/*
** util.c - written in milano by vesely on 6mar2013
** collected mail parsing utilities
*/
/*
* zdkimfilter - Sign outgoing, verify incoming mail messages

Copyright (C) 2013-2020 Alessandro Vesely

This file is part of zdkimfilter

zdkimfilter is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

zdkimfilter is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License version 3
along with zdkimfilter.  If not, see <http://www.gnu.org/licenses/>.

Additional permission under GNU GPLv3 section 7:

If you modify zdkimfilter, or any covered part of it, by linking or combining
it with OpenSSL, OpenDKIM, Sendmail, or any software developed by The Trusted
Domain Project or Sendmail Inc., containing parts covered by the applicable
licence, the licensor of zdkimfilter grants you additional permission to convey
the resulting work.
*/

#include <config.h>
#if !ZDKIMFILTER_DEBUG
#define NDEBUG
#endif

#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#ifndef NO_IDN2
#include <idn2.h>
#endif
#include <arpa/nameser.h>
#include "util.h"
#include <libopendkim/dkim-mailparse.h>
#include <assert.h>

char *hdrval(const char *a, const char *b)
// b must be without trailing ':'
// return pointer after column if headers match, NULL otherwise
{
	assert(a && b && strchr(b, ':') == NULL);
	
	int c, d;
	do c = *(unsigned char const*)a++, d = *(unsigned char const*)b++;
	while (c != 0 && d != 0 && (c == d || tolower(c) == tolower(d)));
	
	if (d != 0 || c == 0)
		return NULL;

	while (c != ':')
		if (!isspace(c) || (c = *(unsigned char const*)a++) == 0)
			return NULL;

	return (char*)a;
}

unsigned int longest_substring(char const *a, char const *b)
{
	assert(a);
	assert(b);

	unsigned int alen = strlen(a);
	unsigned int blen = strlen(b);
	unsigned int prev[blen], cur[blen];
	memset(cur, 0, sizeof cur);

	unsigned int longest = 0;
	for (unsigned int i = 0; i < alen; ++i)
	{
		memcpy(prev, cur, sizeof prev);
		for (unsigned int j = 0; j < blen; ++j)
		{
			if (a[i] == b[j])
			{
				if (i == 0 || j == 0)
					cur[j] = 1;
				else
					cur[j] = prev[j-1] + 1;

				if (cur[j] > longest)
					longest = cur[j];

			}
			else
				cur[j] = 0;
		}
	}

	return longest;
}

int utf8length(unsigned int first)
{
	if (first < 0xbfU) return -1;
	if (first < 0xdfU) return 2;
	if (first < 0xefU) return 3;
	if (first < 0xf8U) return 4;
	return -1;
}

uint16_t my_get16(const unsigned char *src)
{
	uint16_t dst;

	NS_GET16(dst, src);
	return dst;
}


#if !TEST_UTIL  && !TEST_LONGEST_SUBSTRING
#ifndef NO_IDN2
// these require more linking
static int is_ldh(char const *s)
{
	int dot = 0;
	int ch;
	while ((ch = *(unsigned const char*)s++) != 0)
		if (isalnum(ch) && ch != '-')
			dot = 0;
		else if (ch == '.' && dot == 0)
			++dot;
		else
		{
#if CONSIDER_UTF8_STRINGS
			int len = utf8length(ch);
			if (len > 0)
				while (--len > 0 &&
					((ch = *(unsigned const char*)s++) & 0xc0) == 0x80)
						continue;
			if (len != 0)
#endif // CONSIDER_UTF8_STRINGS
				return 0;
		}
	return dot == 0;
}

static int check_domain(char *domain, char** out)
/*
* Check and normalize domain.  Return 0 if ok, 1 if bad, -1 fatal.
*/
{
	assert(domain);
	assert(out);

	char *out2 = NULL;
	*out = NULL;
	int rtc = idn2_to_ascii_8z(domain, &out2,
		IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL);
	if (rtc == IDN2_MALLOC)
		return -1;

	if (rtc == IDN2_OK)
	{
		if (is_ldh(out2))
		{
			rtc = idn2_to_unicode_8z8z(out2, out, 0);
			rtc = rtc == IDN2_OK? 0: rtc == IDN2_MALLOC? -1: 1;
		}
		else
			rtc = 1;
		free(out2);
		return rtc;
	}

//	(*do_report)(LOG_WARNING, "cannot convert normalize %s: %s (%s)\n",
//		domain, idn2_strerror (rtc), idn2_strerror_name(rtc));
	return 1;
}

static inline int x2i(int ch)
{
	assert(isxdigit(ch));

	if (ch <= '9') return ch - '0';
	if (ch <= 'F') return ch - 'A' + 10;
	return ch - 'a' + 10;
}

static void urldecode_inplace(char *s)
{
	assert(s);
	char *o = s;
	int ch;
	while ((ch = *(unsigned char*)s++) != 0)
	{
		int ch1, ch2;

		if (ch == '%' &&
			isxdigit(ch1 = ((unsigned char*)s)[0]) &&
			isxdigit(ch2 = ((unsigned char*)s)[1]))
		{
			*o++ = 16*x2i(ch1) + x2i(ch2);
			s += 2;
		}
		else // silently pretend '%' is valid
			*o++ = ch;
	}
	*o = 0;
}

int mailto_domain(char const *s, char**out)
/*
* Find the first valid domain in a mailto: link of a List- header field.
* Return 0 if found, and out is the normalized domain.  Return 1 if no
* valid domain is found, -1 if a hard error occurred.
*
* mailto url: https://datatracker.ietf.org/doc/html/rfc6068#section-2
* List-* fields: https://datatracker.ietf.org/doc/html/rfc2369#section-3
*/
{
	assert(s);
	assert(out);

	char *scrap = strdup(s);
	if (scrap == NULL)
		return -1;

	char *sc = scrap;
	int rtc = 1;
	*out =NULL;
	while (rtc == 1)
	{
		// .......                 012345678
		char *mailto = strstr(sc, "<mailto:");
		if (mailto == NULL)
			break;

		mailto += 8;
		sc = mailto;
		char *e = (char*)
			my_mail_matching_paren(mailto, mailto + strlen(mailto), '<', '>');
		if (e && *e == '>')
		{
			*e = 0;
			sc = e + 1;

			char *query = strchr(mailto, '?');
			if (query)
				*query = 0;
			char *next = mailto;
			while (next)
			{
				char *comma = strchr(next, ',');
				if (comma)
					*comma++ = 0;

				char *domain = strchr(next, '@');
				next = comma;
				if (domain)
				{
					domain += 1;
					urldecode_inplace(domain);
					if ((rtc = check_domain(domain, out)) != 1)
						break;
				}
			}
		}
	}

	free(scrap);
	return rtc;
}
#endif // NO_IDN2

#if TEST_MAILTO
#include <stdio.h>

int main(int argc, char *argv[])
{
	if (argc == 2 && strcmp(argv[1], "--version") == 0)
		printf("%s version 1\n", argv[0]);
	else
		for (int i = 1; i < argc; ++i)
		{
			char *domain;
			int rtc = mailto_domain(argv[i], &domain);
			printf("%d %s\n", rtc, rtc == 0? domain: "/");
			if (rtc == 0)
				free(domain);
		}
	return 0;
}

#endif // TEST_MAILTO
#endif // !TEST_UTIL  && !TEST_LONGEST_SUBSTRING

char const *skip_token(char const *s)
{
	int ch;
	while ((ch = *(unsigned char*)s) > 32 &&
		ch < 128 &&
		strchr("()<>@,;:\\\"/[]?=", ch) == NULL)
			++s;
	return s;
}

// moved from myadsp.c 17 Feb 2021
char *skip_fws(char const *s)
{
	if (s)
	{
		int ch;
		while (isspace(ch = *(unsigned char*)s))
			++s;
		if (ch == 0)
			s = NULL;
	}
	return (char*) s;
}

#if defined TEST_LONGEST_SUBSTRING
#include <stdio.h>
int main(int argc, char *argv[])
{
	if (argc != 3) {
		printf("usage: longest-common-substring string1 string2\n");
		return 1;
	}

	printf("longest = %d\n", longest_substring(argv[1], argv[2]));

	return 0;
}
#endif // TEST_UTIL / TEST_LONGEST_SUBSTRING
