/*
 * traceroute implementation that does not require root privilege.
 *
 * This requires a Linux 2.4 kernel.
 *
 * Copyright (C) 2000-2005, Olaf Kirch <okir@suse.de>
 *
 * License: GPL 2.0 or later. See COPYING.GPL for details.
 */

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/poll.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <netinet/icmp6.h>
#include <netinet/ip_icmp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <locale.h>
#include <assert.h>
#include "utils.h"
#include "version.h"

/* The kernel wants these */
typedef u_int32_t	__u32;
typedef u_int8_t	__u8;
#include <linux/errqueue.h>

/* These are not available with glibc 2.1.3: */
#ifndef SO_TIMESTAMP
#define SO_TIMESTAMP	29
#endif
#ifndef IPV6_RECVERR
#define IPV6_RECVERR	25
#endif
/* older glibc compatibility
 * From rfc 3542:
 * -  Changed the name of ICMPv6 unreachable code 2 to be "beyond scope
 *    of source address."  ICMP6_DST_UNREACH_NOTNEIGHBOR was removed
 *    with this change.
 */
#if defined(ICMP6_DST_UNREACH_NOTNEIGHBOR) && !defined(ICMP6_DST_UNREACH_BEYONDSCOPE)
# define ICMP6_DST_UNREACH_BEYONDSCOPE ICMP6_DST_UNREACH_NOTNEIGHBOR
#endif

#define MAXHOPS		255
#define MAXPROBES	6
#define MAXGATEWAYS	9

struct hop {
	unsigned int	ttl;
	unsigned int	sent;
	unsigned int	recvd;
	unsigned int	final;

	/* Previously printed address */
	sockaddr_any	prev_addr;

	struct probe {
	    struct hop *	hop;
	    int			fd;
	    struct sock_extended_err ee;
	    struct timeval	sent_time;
	    unsigned long	timeout;
	    struct timeval	recvd_time;
	    sockaddr_any	responder;
	    const char *	err_ind;
	    unsigned int	done : 1,
	    			printed : 1;
	}		probe[MAXPROBES];
};

static struct hop	hops[MAXHOPS];
static unsigned int	first_hop, last_hop;
static unsigned int	opt_timeout = 5,
			opt_retries = 3,
			concurrent_hops = 6,
			max_hops = 30;
static unsigned int	have_final_response;
static unsigned long	now;

static int		af = -1;
//static size_t		af_len = sizeof(struct sockaddr_in);
static char *		src_addr_name;
static sockaddr_any	src_addr;
static const char *	src_device;
static u_int16_t	src_port = 64000;
static const char *	dst_name;
static sockaddr_any	dst_addr;
static u_int16_t	dst_port = 33434;
static u_int8_t		dst_tos;

static int		opt_debug = 0;
static int		opt_noresolver = 0;
static int		opt_dontfrag = IP_PMTUDISC_DONT;
static int		opt_dontroute = 0;
static int		opt_rr = 0;
static char *		opt_gateway_name[MAXGATEWAYS];
static sockaddr_any	opt_gateway[MAXGATEWAYS];
static unsigned int	ngateways;

static unsigned char	packet[65536];
static unsigned int	packetsize = 40;
static unsigned char	ipoptions[40];
static unsigned int	ipoptions_len = 0;

static void		set_now(void);
static void		hop_next(void);
static void		hop_xmit(struct hop *);
static int		hop_sendmsg(int);
static void		hop_print(struct hop *);
static void		probe_recv(struct probe *);
static void		probe_timeout(struct probe *);
static int		probe_recverr(struct probe *);
static int		probe_init(struct hop *, struct probe *);
static void		ipopt_init(void);
static void		usage(int exval);
static void		show_version(void);

int
main(int argc, char **argv)
{
	unsigned int	n;
	int		c;

	/* Set the locale - needed for international domain names */
	setlocale (LC_ALL, "");

	while ((c = getopt(argc, argv, "46DFVf:g:I:m:N:np:t:w:q:rRS:")) != -1) {
		switch (c) {
		case '4':
			af = AF_INET;
			break;
		case '6':
			af = AF_INET6;
			break;
		case 'D':
			opt_debug++;
			break;

		case 'F':
			opt_dontfrag = IP_PMTUDISC_DO;
			break;

		case 'f':
			first_hop = getnum("hop", optarg, 1, 255);
			break;

		case 'g':
			/* fatal("option -g not yet supported"); */
			if (ngateways >= MAXGATEWAYS)
				fatal("too many gateways");
			opt_gateway_name[ngateways++] = optarg;
			break;

		case 'I':
			src_device = optarg;
			break;

		case 'm':
			max_hops = getnum("hop", optarg, 1, 255);
			break;

		case 'N':
			concurrent_hops = getnum("concurrent hop", optarg, 1, 16);
			break;

		case 'n':
			opt_noresolver = 1;
			break;

		case 'p':
			dst_port = getnum("port", optarg, 1, 65535);
			break;

		case 't':
			dst_tos = getnum("TOS", optarg, 0, 255);
			break;

		case 'w':
			opt_timeout = getnum("timeout", optarg, 1, 256);
			break;

		case 'q':
			opt_retries = getnum("retry", optarg, 1, 1024);
			break;

		case 'r':
			opt_dontroute = 1;
			break;

		case 'R':
			opt_rr = 1;
			break;

		case 'S':
			src_addr_name = optarg;
			break;

		case 'V':
			show_version();
			break;
		default:
			usage(1);
		}
	}

	if (optind == argc - 2) {
		packetsize = getnum("packet length", argv[optind+1],
					1, 65536);
	} else
	if (optind != argc - 1)
		usage(1);

	/* Check the program name. If we've called as traceroute6,
	 * default to IPv6 */
	if (af == -1) {
		char	*s;

		if ((s = strrchr(argv[0], '/')) != 0)
			s++;
		else
			s = argv[0];
		if (!strcmp(s, "traceroute6"))
			af = AF_INET6;
	}

	dst_name = argv[optind];
	if (!getaddr(dst_name, af, &dst_addr))
		return 1;
	af = dst_addr.any.sa_family;

	/* Resolve gateways if given */
	for (n = 0; n < ngateways; n++) {
		if (!getaddr(opt_gateway_name[n], af, opt_gateway + n))
			return 1;
	}

	/* Resolve source address if given */
	if (src_addr_name && !getaddr(src_addr_name, af, &src_addr)) {
		return 1;
	}

	if (first_hop >= max_hops) {
		fprintf(stderr, "first hop %u larger than max hops %u\n",
			first_hop, max_hops);
		return 1;
	}

	/* Initialize packet */
	for (n = 0; n < packetsize; n++)
		packet[n] = 0x40 + (n & 0x3f);

	/* Initialize IP options */
	ipopt_init();

	/* Start by sending the first packet */
	last_hop = first_hop;

	printf("traceroute to %s (%s), %u hops max, %u byte packets\n",
		dst_name, straddr(&dst_addr),
		max_hops, packetsize);

	while (1) {
		struct probe	*probe[MAXHOPS*MAXPROBES], *pb;
		struct pollfd	pfd[MAXHOPS*MAXPROBES];
		unsigned long	timeout;
		struct hop	*hop;
		unsigned int	m, n, wait_some = 0;

		while (last_hop < max_hops
		    && last_hop < first_hop + concurrent_hops)
			hop_next();

		set_now();
		timeout = now + 5000;
		memset(pfd, 0, sizeof(pfd));
		for (m = 0, hop = hops + first_hop; hop < hops + last_hop; hop++) {
			unsigned int	num;

			for (num = hop->recvd; num < opt_retries; num++, m++) {
				pb = hop->probe + num;

				if (num >= hop->sent) {
					hop_xmit(hop);
					wait_some = 1;
				}

				pfd[m].events = POLLERR;
				pfd[m].fd = pb->fd;
				if (pb->timeout < timeout)
					timeout = pb->timeout;
				probe[m] = pb;

				/* If we've already transmitted some
				 * packets, wait for 1ms to receive
				 * something. This helps us give much more
				 * accurate RTTs on fast links */
				if (wait_some) {
					timeout = now + 1;
					goto wait_for_some;
				}
			}
		}

		if (m == 0)
			break;

wait_for_some:
		if (opt_debug > 1) {
			fprintf(stderr, "poll %d fds, timeout %lu ms\n",
				m, timeout - now);
		}

		poll(pfd, m, (now < timeout)? (timeout - now) : 0);

		set_now(); /* time has passed - catch up */

		/* Receive any pending ICMP errors */
		for (n = 0; n < m; n++) {
			pb = probe[n];
			if (pfd[n].revents & POLLERR) {
				probe_recv(pb);
			} else if (pb->timeout <= now) {
				probe_timeout(pb);
			}
		}

		/* FIXME: we really shouldn't start printing packets
		 * and resoving addresses while we still have packets
		 * in flight, especially on low RTT links.
		 * We should either skip the output part while that's
		 * the case, or fork and do the printing in a separate
		 * process */

		/* Now loop over all hop structures and see whether we can
		 * add more probes, or wrap up the timed out ones */
		while (first_hop < last_hop) {
			hop = hops + first_hop;
			hop_print(hop);
			if (hop->recvd < hop->sent)
				break;

			/* This one is complete */
			printf("\n");
			first_hop++;
			if (hop->final)
				goto done;
		}
	}

done:	return 0;
}

static void
usage(int exval)
{
	fprintf(stderr,
	"usage: traceroute [-nFV] [-f first_ttl] [-m max_hops] [-p port]\n"
	"           [-S source_addr] [-I interface] [-g gateway]\n"
	"           [-t tos] [-w timeout] [-q nqueries] host [packetlen]\n");
	exit(1);
}

static void
show_version(void)
{
	printf("This is traceroute %s\n", TRACEROUTE_VERSION);
	printf("Copyright (c) 2002 Olaf Kirch\n");
	exit(0);
}

/* Set our idea of the current time */
static void
set_now(void)
{
	struct timeval tv;

	gettimeofday(&tv, NULL);
	now = tv.tv_sec * 1000 + tv.tv_usec / 1000;
}

static void
hop_next(void)
{
	struct hop	*hop = hops + last_hop++;

	hop->ttl = last_hop;
}

static void
hop_xmit(struct hop *hop)
{
	struct probe	*pb;

	if (opt_debug)
		fprintf(stderr, "sending hop %ld packet %d ttl=%d\n",
				(long)(hop - hops + 1), hop->sent, hop->ttl);

	/* Init probe and send packet */
	pb = hop->probe + hop->sent++;
	probe_init(hop, pb);
	if (hop_sendmsg(pb->fd) < 0)
		perror("send failed");
	gettimeofday(&pb->sent_time, NULL);
}

static void
hop_print(struct hop *hop)
{
	struct probe	*p = hop->probe;
	struct timeval	delta;
	unsigned int	nr;

	while (hop->recvd < hop->sent) {
		if (!hop->probe[hop->recvd].done)
			break;
		hop->recvd++;
	}

	for (nr = 0; nr < hop->sent; nr++, p++) {
		if (!p->done)
			return;
		if (p->printed)
			continue;
		if (nr == 0)
			printf("%2lu ", (hop - hops) + 1UL);
		if (p->responder.sin.sin_family == 0) {
			printf(" *");
		} else {
			printf(" ");
			timersub(&p->recvd_time, &p->sent_time, &delta);
			if (delta.tv_sec < 0)
				timerclear(&delta);
			if (!sameaddr(&p->responder, &hop->prev_addr))
				printaddr(&p->responder, !opt_noresolver);
			hop->prev_addr = p->responder;
			if (p->err_ind)
				printf("(%s)", p->err_ind);
			printf("  %lu.%03lu ms",
				1000 * delta.tv_sec + delta.tv_usec / 1000,
				delta.tv_usec % 1000);
		}
		p->printed = 1;
	}
	fflush(stdout);
}

static int
probe_init(struct hop *hop, struct probe *pb)
{
	struct sockaddr_in	sin;
	struct sockaddr_in6	six;
	struct sockaddr		*ap;
	socklen_t		alen;
	int			fd, val, len;

	if ((fd = socket(af, SOCK_DGRAM, 0)) < 0)
		fatal("unable to create UDP socket: %m");

	len = sizeof(val);
	if (af == AF_INET) {
		val = 1;
		if (setsockopt(fd, SOL_IP, IP_RECVERR, &val, len) < 0)
			fatal("unable to set SO_RECVERR: %m");
		val = hop->ttl;
		if (setsockopt(fd, SOL_IP, IP_TTL, &val, len) < 0)
			fatal("unable to set TTL: %m");
		val = dst_tos;
		if (setsockopt(fd, SOL_IP, IP_TOS, &val, len) < 0)
			fatal("unable to set TOS: %m");
		val = opt_dontfrag;
		if (setsockopt(fd, SOL_IP, IP_MTU_DISCOVER, &val, len) < 0)
			fatal("unable to set MTU_DISCOVER: %m");
	} else {
		val = 1;
		if (setsockopt(fd, SOL_IPV6, IPV6_RECVERR, &val, len) < 0)
			fatal("unable to set SO_RECVERR: %m");
		val = hop->ttl;
		if (setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &val, len) < 0)
			fatal("unable to set IPV6_UNICAST_HOPS: %m");
	}

	val = opt_dontroute;
	if (setsockopt(fd, SOL_SOCKET, SO_DONTROUTE, &val, len) < 0)
		fatal("unable to set SO_DONTROUTE: %m");
	val = 1;
	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP, &val, len) < 0)
		fatal("unable to set SO_TIMESTAMP: %m");

	if (src_device) {
		len = strlen(src_device)+1;
		if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src_device, len) < 0)
			fatal("Unable to bind to network interface %s: %m",
				src_device);
	}

	if (af == AF_INET) {
		ap = (struct sockaddr *) &sin;
		alen = sizeof(sin);
	} else {
		ap = (struct sockaddr *) &six;
		alen = sizeof(six);
	}
	if (src_addr.any.sa_family == af) {
		memcpy(ap, &src_addr, alen);
	} else {
		memset(ap, 0, alen);
		ap->sa_family = af;
	}

	while (1) {
		if (af == AF_INET)
			sin.sin_port = htons(src_port);
		else
			six.sin6_port = htons(src_port);
		src_port++;

		if (bind(fd, ap, alen) >= 0)
			break;
		if (errno != EADDRINUSE)
			fatal("unable to bind socket: %m");
	}

	memcpy(ap, &dst_addr, alen);
	if (af == AF_INET) {
		sin.sin_port = htons(dst_port);
	} else {
		six.sin6_port = htons(dst_port);
	}
	dst_port++;

	if (connect(fd, ap, alen) < 0)
		fatal("Unable to connect to %s: %m", straddr(&dst_addr));

	pb->timeout = now + opt_timeout * 1000;
	pb->hop = hop;
	pb->fd = fd;
	return 0;
}

static inline struct cmsghdr *
cmsg_put(struct cmsghdr *cm, int type, void *data, size_t len)
{
	cm->cmsg_level = SOL_IP;
	cm->cmsg_type  = type;

	memcpy(CMSG_DATA(cm), data, len);
	cm->cmsg_len = CMSG_LEN(len);

//	msg.msg_controllen += cm->cmsg_len;
	return (struct cmsghdr *) ((caddr_t) cm + cm->cmsg_len);
}

static int
hop_sendmsg(int fd)
{
	struct msghdr	msg;
	unsigned char	control[1024];
	struct cmsghdr	*cm;
	struct iovec	iov;

	memset(&msg, 0, sizeof(msg));
	iov.iov_base	= packet;
	iov.iov_len	= packetsize;

	msg.msg_iov	= &iov;
	msg.msg_iovlen	= 1;

	/* Copy IP options, if specified.  */
	cm = (struct cmsghdr *) control;
	if (ipoptions_len) {
		cm = cmsg_put(cm, IP_RETOPTS, ipoptions, ipoptions_len);
	}

	/* Set source address, if specified */
	if (src_addr.sin.sin_family) {
		if (src_addr.sin.sin_family == AF_INET) {
			struct in_pktinfo info;

			memset(&info, 0, sizeof(info));
			info.ipi_spec_dst = ((struct sockaddr_in *) &src_addr)->sin_addr;

			cm = cmsg_put(cm, IP_PKTINFO, &info, sizeof(info));
		} else {
			struct in6_pktinfo info;

			memset(&info, 0, sizeof(info));
			info.ipi6_addr = ((struct sockaddr_in6 *) &src_addr)->sin6_addr;

			cm = cmsg_put(cm, IPV6_PKTINFO, &info, sizeof(info));
		}
	}

	msg.msg_controllen = ((unsigned char *) cm) - control;
	if (msg.msg_controllen)
		msg.msg_control	= control;

	return sendmsg(fd, &msg, 0);
}

static void
probe_recv(struct probe *p)
{
	static char	errbuf[32];
	struct hop	*hop = p->hop;
	const char	*errstring = 0;

	/* recv errmsg */
	if (probe_recverr(p) < 0)
		return;

	if (opt_debug)
		fprintf(stderr, "received hop %u response\n", hop->ttl);

	/* Handle error codes */
	errstring = errbuf;
	if (p->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
		switch (p->ee.ee_type) {
		case ICMP_TIME_EXCEEDED:
			return;

		case ICMP_DEST_UNREACH:
			switch (p->ee.ee_code) {
			case ICMP_UNREACH_NET:
			case ICMP_UNREACH_NET_UNKNOWN:
			case ICMP_UNREACH_ISOLATED:
			case ICMP_UNREACH_NET_PROHIB:
			case ICMP_UNREACH_TOSNET:
			case ICMP_UNREACH_FILTER_PROHIB:
				errstring = "N!";
				break;

			case ICMP_UNREACH_HOST:
			case ICMP_UNREACH_HOST_UNKNOWN:
			case ICMP_UNREACH_HOST_PROHIB:
			case ICMP_UNREACH_TOSHOST:
			case ICMP_UNREACH_HOST_PRECEDENCE:
				errstring = "H!";
				break;

			case ICMP_UNREACH_PORT:
				/* we've reached the destintation host */
				errstring = 0;
				break;

			case ICMP_UNREACH_PROTOCOL:
				errstring = "P!";
				break;

			case ICMP_UNREACH_NEEDFRAG:
				errstring = "F!";
				break;

			case ICMP_UNREACH_SRCFAIL:
				errstring = "S!";
				break;

			default:
				errstring = "!!";
				break;
			}
			break;
		}
	} else
	if (p->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
		switch (p->ee.ee_type) {
		case ICMP6_TIME_EXCEEDED:
			if (p->ee.ee_code == ICMP6_TIME_EXCEED_TRANSIT)
				return;
			break;

		case ICMP6_DST_UNREACH:
			switch (p->ee.ee_code) {
			case ICMP6_DST_UNREACH_NOROUTE:
			case ICMP6_DST_UNREACH_ADMIN:
				errstring = "N!";
				break;

			case ICMP6_DST_UNREACH_BEYONDSCOPE:
			case ICMP6_DST_UNREACH_ADDR:
				errstring = "H!";
				break;

			case ICMP6_DST_UNREACH_NOPORT:
				/* we've reached the destintation host */
				errstring = 0;
				break;

			default:
				errstring = "!!";
				break;
			}
			break;
		}
	}

	if (errstring == errbuf) {
		snprintf(errbuf, sizeof(errbuf), "icmp-%d-%d",
				p->ee.ee_type, p->ee.ee_code);
	}

	p->err_ind = errstring;
	have_final_response = 1;
	hop->final = 1;
}

static int
probe_recverr(struct probe *p)
{
	struct sockaddr_in sin;
	struct msghdr	msg;
	unsigned char	control[1024];
	struct cmsghdr	*cm;

	memset(&msg, 0, sizeof(msg));
	memset(&sin, 0, sizeof(sin));
	memset(&p->ee, 0, sizeof(p->ee));
	gettimeofday(&p->recvd_time, NULL);
	memset(&p->responder, 0, sizeof(p->responder));

	msg.msg_name	= &sin;
	msg.msg_namelen	= sizeof(sin);
	msg.msg_control	= control;
	msg.msg_controllen = sizeof(control);

	if (recvmsg(p->fd, &msg, MSG_ERRQUEUE) < 0)
		return -1;

//printf("cmsgs:");
	for (cm = CMSG_FIRSTHDR(&msg);
	     cm;
	     cm = CMSG_NXTHDR(&msg, cm)) {
		struct sock_extended_err *ep;

//printf(" lvl=%d/typ=%d", cm->cmsg_level, cm->cmsg_type);
		if (cm->cmsg_level == SOL_SOCKET
		 && cm->cmsg_type == SO_TIMESTAMP) {
			memcpy(&p->recvd_time, CMSG_DATA(cm),
					sizeof(struct timeval));
			continue;
		}

		if (!(cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR)
		 && !(cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR))
		 	continue;

		ep = (struct sock_extended_err *) CMSG_DATA(cm);
		memcpy(&p->ee, ep, sizeof(p->ee));

		memcpy(&p->responder, SO_EE_OFFENDER(ep), sizeof(sockaddr_any));
//printf(";addr="); printaddr(&p->responder, 0);
	}
//printf("\n");

	p->done = 1;
	close(p->fd);
	p->fd = -1;
	return 0;
}

static void
probe_timeout(struct probe *pb)
{
	pb->done = 1;
}

static void
ipopt_init(void)
{
	unsigned int	nr;

	if (af == AF_INET6) {
		if (ngateways)
			fprintf(stderr,
				"Sorry, this traceroute implementation does "
				"not support source routing for IPv6.\n");
		return;
	}

	if (ngateways) {
		ipoptions[0] = IPOPT_NOP; /* pad */
		ipoptions[1] = IPOPT_LSRR;
		ipoptions[2] = 39;
		ipoptions[3] = 4;
		for (nr = 0; nr < ngateways; nr++) {
			struct sockaddr_in	*sin;

			sin = (struct sockaddr_in *) &opt_gateway[nr];
			memcpy(ipoptions + ((nr + 1) << 2),
				&sin->sin_addr, 4);
		}
		ipoptions_len = 40;
	} else
	if (opt_rr) {
		ipoptions[0] = IPOPT_NOP; /* pad */
		ipoptions[1] = IPOPT_RR;
		ipoptions[2] = 39;
		ipoptions[3] = 4;
		ipoptions_len = 40;
	}
}
