/* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */

/* AbiWord: ie_imp_HTML - plugin for Multipart [X]HTML
 * 
 * Copyright (C) 2001-2003 Francis James Franklin <fjf@alinameridon.com>
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
 * 02111-1307, USA.
 */

#include "ut_html.h"

#include "ie_FileInfo.h"
#include "ie_imp_HTML.h"

IE_Imp_HTML_Sniffer::IE_Imp_HTML_Sniffer ()
#ifdef XHTML_NAMED_CONSTRUCTORS
	: IE_ImpSniffer("AbiXHTML::HTML libxml2")
#endif
{
	// 
}

#ifdef XHTML_NAMED_CONSTRUCTORS

UT_Confidence_t IE_Imp_HTML_Sniffer::supportsMIME (const char * szMIME)
{
	if (UT_strcmp (IE_FileInfo::mapAlias (szMIME), IE_MIME_HTML) == 0)
		{
			return UT_CONFIDENCE_GOOD;
		}
	return UT_CONFIDENCE_ZILCH;
}

#endif /* XHTML_NAMED_CONSTRUCTORS */

UT_Confidence_t IE_Imp_HTML_Sniffer::recognizeContents (const char * szBuf, UT_uint32 iNumbytes)
{
	UT_uint32 iLinesToRead = 6 ;  // Only examine the first few lines of the file
	UT_uint32 iBytesScanned = 0 ;
	const char *p ;
	char *magic ;
	p = szBuf ;
	while( iLinesToRead-- )
	{
		magic = "<html" ;
		if ( (iNumbytes - iBytesScanned) < strlen(magic) ) return(UT_CONFIDENCE_ZILCH);
		if ( strncmp(p, magic, strlen(magic)) == 0 ) return(UT_CONFIDENCE_PERFECT);
		magic = "<!DOCTYPE html" ;
		if ( (iNumbytes - iBytesScanned) < strlen(magic) ) return(UT_CONFIDENCE_ZILCH);
		if ( strncmp(p, magic, strlen(magic)) == 0 ) return(UT_CONFIDENCE_PERFECT);

		magic = "<!DOCTYPE HTML" ;
		if ( (iNumbytes - iBytesScanned) < strlen(magic) ) return(UT_CONFIDENCE_ZILCH);
		if ( strncmp(p, magic, strlen(magic)) == 0 ) return(UT_CONFIDENCE_PERFECT);
		/*  Seek to the next newline:  */
		while ( *p != '\n' && *p != '\r' )
		{
			iBytesScanned++ ; p++ ;
			if( iBytesScanned+2 >= iNumbytes ) return(UT_CONFIDENCE_ZILCH);
		}
		/*  Seek past the next newline:  */
		if ( *p == '\n' || *p == '\r' )
		{
			iBytesScanned++ ; p++ ;
			if ( *p == '\n' || *p == '\r' )
			{
				iBytesScanned++ ; p++ ;
			}
		}
	}
	return UT_CONFIDENCE_ZILCH;
}

UT_Confidence_t IE_Imp_HTML_Sniffer::recognizeSuffix (const char * szSuffix)
{
	if (!(UT_stricmp(szSuffix,".html")) || !(UT_stricmp(szSuffix,".htm")))
		return UT_CONFIDENCE_PERFECT;
	return UT_CONFIDENCE_ZILCH;
}

UT_Error IE_Imp_HTML_Sniffer::constructImporter (PD_Document * pDocument, IE_Imp ** ppie)
{
	IE_Imp_XHTML * p = new IE_Imp_HTML(pDocument);
	*ppie = p;
	return UT_OK;
}

bool IE_Imp_HTML_Sniffer::getDlgLabels (const char ** pszDesc,
										const char ** pszSuffixList, 
										IEFileType * ft)
{
	*pszDesc = "HTML [via libxml2] (.html, .htm)";
	*pszSuffixList = "*.html; *.htm";
	*ft = getFileType();
	return true;
}

IE_Imp_HTML::IE_Imp_HTML (PD_Document * pDocument) :
	IE_Imp_XHTML(pDocument)
{
	// 
}

IE_Imp_HTML::~IE_Imp_HTML ()
{
	// 
}

UT_Error IE_Imp_HTML::importFile (const char * szFilename)
{
	UT_HTML parser;
	setParser (&parser);

	UT_Error e = IE_Imp_XHTML::importFile (szFilename);

	setParser (0);

	return e;
}