/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* enchant
 * Copyright (C) 2003 Dom Lachowicz
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * In addition, as a special exception, Dom Lachowicz
 * gives permission to link the code of this program with
 * the non-LGPL Spelling Provider libraries (eg: a MSFT Office
 * spell checker backend) and distribute linked combinations including
 * the two.  You must obey the GNU Lesser General Public License in all
 * respects for all of the code used other than said providers. If you modify
 * this file, you may extend this exception to your version of the
 * file, but you are not obligated to do so. If you do not wish to
 * do so, delete this exception statement from your version.
 */

/**
 * This is a rough approximation of an "ispell compatibility mode"
 * for Enchant.
 */

#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <glib.h>

#include "enchant.h"

/* word has to be bigger than this to be checked */
#define MIN_WORD_LENGTH 1

typedef enum 
	{
		MODE_NONE,
		MODE_VERSION,
		MODE_A,
		MODE_L,
		MODE_FILE
	} IspellMode_t;

static void 
print_version (FILE * to)
{
	fprintf (to, "@(#) International Ispell Version 3.1.20 (but really Enchant %s)\n", VERSION);
}

static void
print_help (FILE * to, const char * prog)
{
	fprintf (to, "Usage: %s [options] -a|-l|-L|-v[v]|<file>\n", prog);
	fprintf (to, "\t-a lists alternatives.\n", prog);
	fprintf (to, "\t-l lists misspelings.\n", prog);
	fprintf (to, "\t-L displays line numbers.\n", prog);
	fprintf (to, "\t-v displays program version.\n", prog);
}

static gboolean
consume_line (FILE * in, GString * str)
{
	int ch;
	gsize bytes_read, bytes_written;
	gchar * utf;
	gboolean ret = TRUE;

	g_string_truncate (str, 0);

	while (ret && (ch = fgetc (in)) != EOF) {
		if (ch == '\r')
			continue;
		else {
			g_string_append_c (str, ch);
			if (ch == '\n')
				ret = FALSE;
		}
	}

	if (str->len) {
		utf = g_locale_to_utf8 (str->str, str->len, &bytes_read, &bytes_written, NULL);

		if (utf) {
			g_string_assign (str, utf);
			g_free (utf);
		} 
		/* else str->str stays the same. we'll assume that it's 
		   already utf8 and glib is just being stupid */
	}

	return ret;
}

static void
print_utf (FILE * out, const char * str)
{
	gsize bytes_read, bytes_written;
	gchar * native;

	native = g_locale_from_utf8 (str, -1, &bytes_read, &bytes_written, NULL);
	if (native) {
		fwrite (native, 1, bytes_written, out);
		g_free (native);
	} else {
		fwrite (str, 1, strlen (str), out);
	}
}

static void
do_mode_a (FILE * out, EnchantDict * dict, GString * word, size_t start_pos, size_t lineCount)
{
	size_t n_suggs;
	char ** suggs;	

	if (word->len <= MIN_WORD_LENGTH || enchant_dict_check (dict, word->str, word->len) == 0)
		if (lineCount)
			fprintf (out, "* %ld\n", lineCount);
		else
			fwrite ("*\n", 1, 2, out);
	else {
		suggs = enchant_dict_suggest (dict, word->str, 
					      word->len, &n_suggs);
		if (!n_suggs || !suggs) {
			fwrite ("# ", 1, 2, out);
			if (lineCount)
				fprintf (out, "%ld ", lineCount);
			print_utf (out, word->str);
			fprintf (out, " %ld\n", start_pos+1);
		}
		else {
			size_t i = 0;
			
			fwrite ("& ", 1, 2, out);
			if (lineCount)
				fprintf (out, "%ld ", lineCount);
			print_utf (out, word->str);
			fprintf (out, " %ld %ld:", n_suggs, start_pos);
			
			for (i = 0; i < n_suggs; i++) {
				fprintf (out, " ");
				print_utf (out, suggs[i]);

				if (i != (n_suggs - 1))
					fwrite (",", 1, 1, out);
				else
					fwrite ("\n", 1, 1, out);
			}
		}
	}
}

static void
do_mode_l (FILE * out, EnchantDict * dict, GString * word, size_t lineCount)
{
	if (enchant_dict_check (dict, word->str, word->len) != 0) {
		if (lineCount)
			fprintf (out, "%ld ", lineCount);
		print_utf (out, word->str);
		fwrite ("\n", 1, 1, out);
	}
}

/* splits a line into a set of (word,word_position) touples */
static GSList *
tokenize_line (GString * line)
{
	GSList * tokens = NULL;
	size_t start_pos, cur_pos;
	char *utf = (char *) line->str;

	GString * word;
	
	gunichar uc;
	
	start_pos = cur_pos = 0;
	word = g_string_new (NULL);

	while (cur_pos < line->len && *utf) {
		uc = g_utf8_get_char (utf); 
		
		switch (g_unichar_type(uc)) {
		case G_UNICODE_MODIFIER_LETTER:
		case G_UNICODE_LOWERCASE_LETTER:
		case G_UNICODE_TITLECASE_LETTER:
		case G_UNICODE_UPPERCASE_LETTER:
		case G_UNICODE_OTHER_LETTER:
		case G_UNICODE_COMBINING_MARK:
		case G_UNICODE_ENCLOSING_MARK:
		case G_UNICODE_NON_SPACING_MARK:
		case G_UNICODE_DECIMAL_NUMBER:
		case G_UNICODE_LETTER_NUMBER:
		case G_UNICODE_OTHER_NUMBER:
		case G_UNICODE_CONNECT_PUNCTUATION:
			g_string_append_unichar (word, uc);
			cur_pos++;
			break;
		case G_UNICODE_OTHER_PUNCTUATION:
			if (uc == '\'') {
				g_string_append_unichar (word, uc);
				cur_pos++;
				break;
			}
			/* else fall through */
		default: /* some sort of non-word character */
			if (word->len) {
				tokens = g_slist_append (tokens,
							 g_string_new_len (word->str, word->len));
				tokens = g_slist_append (tokens,
							 GINT_TO_POINTER(start_pos));
				g_string_truncate (word, 0);
				start_pos = ++cur_pos;
			}
			break;
		}
		utf = g_utf8_next_char (utf);
	}

	g_string_free (word, TRUE);

	return tokens;
}

static int
parse_file (FILE * in, FILE * out, IspellMode_t mode, int countLines)
{
	EnchantBroker * broker;
	EnchantDict * dict;
	
	GString * str, * word = NULL;
	GSList * tokens, *token_ptr;
	gchar * lang, *lang_punct;
	size_t pos, lineCount = 0;

	gboolean was_last_line = FALSE, corrected_something = FALSE;

	if (mode == MODE_A)
		print_version (out);

	lang = g_strdup (g_getenv ("LANG"));
	if (!lang || !strcmp (lang, "C"))
		lang = g_strdup ("en");
	else {
		/* get rid of useless trailing garbage like de_DE@euro or de_DE.ISO-8859-15 */
		if ((lang_punct = strrchr (lang, '.')) != NULL)
			*lang_punct = '\0';
		if ((lang_punct = strrchr (lang, '@')) != NULL)
			*lang_punct = '\0';
	}
	
	broker = enchant_broker_init ();
	dict = enchant_broker_request_dict (broker, lang);

	if (!dict) {
		fprintf (stderr, "Couldn't create a dictionary for %s\n", lang);
		g_free (lang);
		enchant_broker_free (broker);
		return 1;
	}

	g_free (lang);

	str = g_string_new (NULL);
	
	while (!was_last_line) {
		was_last_line = consume_line (in, str);

		if (countLines)
			lineCount++;

		if (str->len) {

			corrected_something = FALSE;
			token_ptr = tokens = tokenize_line (str);
			while (tokens != NULL) {
				corrected_something = TRUE;

				word = (GString *)tokens->data;
				tokens = tokens->next;
				pos = GPOINTER_TO_INT(tokens->data);
				tokens = tokens->next;

				if (mode == MODE_A)
					do_mode_a (out, dict, word, pos, lineCount);
				else if (mode == MODE_L)
					do_mode_l (out, dict, word, lineCount);
			}

			if (token_ptr)
				g_slist_free (token_ptr);
		} 
		
		if (mode == MODE_A && corrected_something)
			fwrite ("\n", 1, 1, out);
		
		g_string_truncate (str, 0);
	}
	
	enchant_broker_free_dict (broker, dict);
	enchant_broker_free (broker);

	if (word)
		g_string_free (word, TRUE);
	g_string_free (str, TRUE);

	return 0;
}

int main (int argc, char ** argv)
{
	IspellMode_t mode = MODE_NONE;
	
	char * arg, * file = NULL;
	int i, rval = 0;
	
	FILE * fp = stdin;

	int countLines = 0;
	
	for (i = 1; i < argc; i++) {
		arg = argv[i];
		if (arg[0] == '-') {
			if (strlen (arg) == 2) {
				if (arg[1] == 'a')
					mode = MODE_A;
				else if (arg[1] == 'l')
					mode = MODE_L;
				else if (arg[1] == 'v')
					mode = MODE_VERSION;
				else if (arg[1] == 'L')
					countLines = 1;
			} 
			else if (strlen (arg) > 2) {
				fprintf (stderr, "-%c does not take any parameters.\n", arg[1]);
				exit(1);
			} 
			else
				file = arg;
		} 
		else
			file = arg;
	}
	
	if (mode == MODE_VERSION) {
		print_version (stdout);
	} 
	else if (mode == MODE_NONE && !file) {
		print_help (stdout, argv[0]);
	}
	else {
		setlocale (LC_ALL, "");

		if (file) {
			fp = fopen (file, "r");
			if (!fp) {
				fprintf (stderr, "Couldn't open '%s' to spellcheck\n", file);
				exit (1);
			}
		}
		
		rval = parse_file (fp, stdout, mode, countLines);
		
		if (file)
			fclose (fp);
	}
	
	return rval;
}