/*
 * AbiSource Program Utilities
 * Copyright (C) 2001 Dom Lachowicz <doml@appligent.com>
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
 * 02111-1307, USA.
 */

// zipios++ stuff for handling ZIP files
#include "zipios++/zipios-config.h"
#include "zipios++/zipfile.h"
#include "zipios++/meta-iostreams.h"
#include "zipios++/fcollexceptions.h"
#include "zipios++/zipinputstream.h"
#include "zipios++/zipoutputstream.h"

// abiword stuff
#include "ut_xml.h"
#include "ut_string.h"
#include "ut_string_class.h"
#include "ut_bytebuf.h"
#include "xap_Module.h"
#include "ie_imp.h"
#include "pd_Document.h"

#include "xap_EncodingManager.h"

#include "ut_assert.h"
#include "ut_debugmsg.h"

using namespace zipios ;

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class used to import OpenWriter documents
 */
class ABI_EXPORT IE_Imp_OpenWriter : public IE_Imp
{
public:
  IE_Imp_OpenWriter (PD_Document * pDocument);
  virtual ~IE_Imp_OpenWriter ();
  virtual UT_Error importFile(const char * szFilename);

  PD_Document * getDocument ();

private:
  static UT_Error readStreamIntoByteBuf ( ZipFile & oo, const char * stream,
					  UT_ByteBuf & bytebuf );

  UT_Error _handleMetaStream ( UT_ByteBuf &stm );
  UT_Error _handleSettingsStream ( UT_ByteBuf &stm );
  UT_Error _handleStylesStream ( UT_ByteBuf &stm );
  UT_Error _handleContentStream ( UT_ByteBuf &stm );
};

/*****************************************************************************/
/*****************************************************************************/

class ABI_EXPORT IE_Imp_OpenWriter_Sniffer : public IE_ImpSniffer
{
  
public:

  /*!
   * Recognize the well-known suffixes, if any
   */
  virtual UT_Confidence_t recognizeSuffix (const char * szSuffix)
  {
    if (!UT_stricmp(szSuffix, ".sxw"))
      return UT_CONFIDENCE_PERFECT;
    return UT_CONFIDENCE_ZILCH;
  }
  
  /*!
   * Recognize the contents as best we can
   */
  virtual UT_Confidence_t recognizeContents (const char * szBuf, 
				  UT_uint32 iNumbytes)
  {
    // BOGUS - will identify any zip file, also contains null characters
    static const char * magic = "PK     ";
    UT_uint32 magic_len = strlen ( magic );
    
    if (iNumbytes < magic_len)
      return UT_CONFIDENCE_ZILCH;
    
    if ( !strncmp (szBuf, magic, magic_len) )
      return UT_CONFIDENCE_SOSO;
    return UT_CONFIDENCE_ZILCH;
  }
  
  /*!
   * Construct an importer for ourselves
   */
  virtual UT_Error constructImporter (PD_Document * pDocument,
				      IE_Imp ** ppie)
  {
    IE_Imp_OpenWriter * p = new IE_Imp_OpenWriter(pDocument);
    *ppie = p;
    return UT_OK;
  }
  
  /*!
   * Get the dialog labels
   */
  bool getDlgLabels (const char ** szDesc,
		     const char ** szSuffixList,
		     IEFileType * ft)
  {
    *szDesc = "OpenWriter Documents (.sxw)";
    *szSuffixList = "*.sxw";
    *ft = getFileType();
    return true;
  }

};

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Create a new OpenWriter importer object
 */
IE_Imp_OpenWriter::IE_Imp_OpenWriter (PD_Document * pDocument)
  : IE_Imp ( pDocument )
{
}

/*!
 * Destroy an OpenWriter importer object
 */
IE_Imp_OpenWriter::~IE_Imp_OpenWriter ()
{
}

/*!
 * Import the given file
 */
UT_Error IE_Imp_OpenWriter::importFile(const char * szFilename)
{
  try
    {
      ZipFile oo ( szFilename );
      
      UT_ByteBuf stm;
      UT_Error error = UT_OK;
      
      if ( readStreamIntoByteBuf ( oo, "meta.xml", stm ) == UT_OK )
	error = _handleMetaStream ( stm );
      if ( error != UT_OK ) return error;
      
      if ( readStreamIntoByteBuf ( oo, "settings.xml", stm ) == UT_OK )
	error = _handleSettingsStream ( stm );
      if ( error != UT_OK ) return error;
      
      if ( readStreamIntoByteBuf ( oo, "styles.xml", stm ) == UT_OK )
	error = _handleStylesStream ( stm );
      if ( error != UT_OK ) return error;
      
      if ( readStreamIntoByteBuf ( oo, "content.xml", stm ) == UT_OK )
	error = _handleContentStream ( stm );
      if ( error != UT_OK ) return error;
      
      return UT_OK;
    }
  catch (...)
    {
      return UT_ERROR;
    }
}

/*!
 *
 */
PD_Document * IE_Imp_OpenWriter::getDocument () 
{
  return getDoc ();
}

/*!
 * Static utility method to read a file/stream embedded inside of the
 * zipfile into the byte-buffer
 */
UT_Error IE_Imp_OpenWriter::readStreamIntoByteBuf ( ZipFile & oo, 
						    const char * stream,
						    UT_ByteBuf & bytebuf )
{
  bytebuf.truncate (0);
  
  try 
    {
      istream * pstream = oo.getInputStream (stream);
      if (!pstream)
	return false;

      char buf[4096];
      UT_sint32 nread = 0;
      
      while ((nread = pstream->rdbuf()->sgetn(buf, sizeof(buf))) > 0)
	{
	  bytebuf.append ( (const UT_Byte *)buf, nread );
	}

      delete pstream;
      return UT_OK;
    }
  catch (...)
    {
      return UT_ERROR;
    }
}

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class whose responsibility is to turn UTF-8 strings into UCS-2 ones
 * This class should probably be moved into some more general
 * place inside of the AbiWord tree, because it's really quite useful
 * and duplicate code is used in a lot of importers/exporters and in other
 * places. Note: does not depend on a working iconv implementation
 */
class ABI_EXPORT UTF8_To_UCS2_Manager
{
private:
  XML_Char		m_charDataSeen[4];
  UT_uint32		m_lenCharDataSeen;
  UT_uint32		m_lenCharDataExpected;
  bool			m_bSeenCR;
  
public:
  
  UTF8_To_UCS2_Manager ()
    : m_lenCharDataSeen(0), m_lenCharDataExpected(0), m_bSeenCR(false)
  {
  }
  
  ~UTF8_To_UCS2_Manager ()
  {
  }
  
  /*!
   * Convert UCS2 to UTF8
   */
  UT_Error toutf8 (const UT_UCS2String &in, UT_String &out)
  {
    return toutf8 ((const UT_UCSChar *)in.ucs_str(), in.size(), out);
  }

  /*!
   * Convert UCS2 to UTF8
   */
  UT_Error toutf8 (const UT_UCSChar * data, UT_uint32 length, UT_String &out)
  {
	const UT_UCSChar * pData = 0;

	UT_ASSERT(sizeof(UT_Byte) == sizeof(char));
	
	for (pData=data; (pData<data+length); /**/)
	  {
		if (*pData > 0x007f)
		  {
		    if(XAP_EncodingManager::get_instance()->isUnicodeLocale() || 
		       (XAP_EncodingManager::get_instance()->try_nativeToU(0xa1) == 0xa1))
		      
		      {
			XML_Char * pszUTF8 = UT_encodeUTF8char(*pData++);
			while (*pszUTF8)
			  {
			    out += (char)*pszUTF8;
			    pszUTF8++;
			  }
		      }
		    else
		      {
			/*
			  Try to convert to native encoding and if
			  character fits into byte, output raw byte. This 
			  is somewhat essential for single-byte non-latin
			  languages like russian or polish - since
			  tools like grep and sed can be used then for
			  these files without any problem.
			  Networks and mail transfers are 8bit clean
			  these days.  - VH
			*/
			UT_UCSChar c = XAP_EncodingManager::get_instance()->try_UToNative(*pData);
			if (c==0 || c>255)
			  {
			    char localBuf[20];
			    char * plocal = localBuf;
			    sprintf(localBuf,"&#x%x;",*pData++);
			    out += plocal;
			  }
			else
			  {
			    out += (char)c;
			    pData++;
			  }
		      }
		  }
		else
		  {
		    out += (char)*pData++;
		  }
		break;
	  }

	return UT_OK;
  }

  /*!
   * Convert UTF8 to UCS2
   */
  UT_Error toucs2 (const UT_String & in, UT_UCS2String & out)
  {
    return toucs2 ((const XML_Char *)in.c_str(), in.size(), out);
  }

  /*!
   * Convert UTF8 to UCS2
   */
  UT_Error toucs2 (const XML_Char *s, int len, UT_UCS2String &buf)
  {
    UT_ASSERT(sizeof(XML_Char) == sizeof(UT_Byte));
    UT_ASSERT(sizeof(XML_Char) != sizeof(UT_UCSChar));
    
    // parse UTF-8 text and convert to Unicode.
    // also take care of some white-space issues:
    //    [] convert CRLF to SP.
    //    [] convert CR to SP.
    //    [] convert LF to SP.
    // ignored words processing doesn't care about the 
    // white-space stuff, but it does no harm
    
    UT_Byte * ss = (UT_Byte *)s;
    UT_Byte currentChar;
    
    for (int k=0; k<len; k++)
      {
	currentChar = ss[k];
	
	if ((ss[k] < 0x80) && (m_lenCharDataSeen > 0))
	  {
	    // is it us-ascii and we are in a UTF-8
	    // multi-byte sequence.  puke.
	    UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
	    return UT_ERROR;
	  }
      
	if (currentChar == UCS_CR)
	  {
	    buf += UCS_LF;
	    m_bSeenCR = true;
	    continue;
	  }
	
	if (currentChar == UCS_LF)	// LF
	  {
	    buf += UCS_LF;
	    m_bSeenCR = false;
	    continue;
	  }
	
	m_bSeenCR = false;
	
	if (currentChar < 0x80)			// plain us-ascii part of latin-1
	  {
	    buf += ss[k];		// copy as is.
	  }
	else if ((currentChar & 0xf0) == 0xf0)	// lead byte in 4-byte surrogate pair
	  {
	    // surrogate pairs are defined in section 3.7 of the
	    // unicode standard version 2.0 as an extension
	    // mechanism for rare characters in future extensions
	    // of the unicode standard.
	    UT_ASSERT(m_lenCharDataSeen == 0);
	    UT_ASSERT(UT_NOT_IMPLEMENTED);
	    return UT_ERROR;
	  }
	else if ((currentChar & 0xe0) == 0xe0)  // lead byte in 3-byte sequence
	  {
	    UT_ASSERT(m_lenCharDataSeen == 0);
	    m_lenCharDataExpected = 3;
	    m_charDataSeen[m_lenCharDataSeen++] = currentChar;
	  }
	else if ((currentChar & 0xc0) == 0xc0)	// lead byte in 2-byte sequence
	  {
	    UT_ASSERT(m_lenCharDataSeen == 0);
	    m_lenCharDataExpected = 2;
	    m_charDataSeen[m_lenCharDataSeen++] = currentChar;
	  }
	else if ((currentChar & 0x80) == 0x80)		// trailing byte in multi-byte sequence
	  {
	    UT_ASSERT(m_lenCharDataSeen > 0);
	    m_charDataSeen[m_lenCharDataSeen++] = currentChar;
	    if (m_lenCharDataSeen == m_lenCharDataExpected)
	      {
		buf += UT_decodeUTF8char(m_charDataSeen,m_lenCharDataSeen);
		m_lenCharDataSeen = 0;
	      }
	  }
      }
    return UT_OK;
  }
};

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Baseclass for all OpenWriter listeners, basically a shim class
 * to expose a GetDocument() and a GetImporter() method
 */
class ABI_EXPORT OpenWriter_Stream_Listener : public virtual UT_XML::Listener
{
private:
  IE_Imp_OpenWriter * m_pImporter;
  
protected:
  inline IE_Imp_OpenWriter * getImporter () { return m_pImporter; }
  inline PD_Document * getDocument() { return m_pImporter->getDocument(); }
  
public:
  OpenWriter_Stream_Listener ( IE_Imp_OpenWriter * importer )
    : m_pImporter ( importer )
  {
  }
  
  virtual ~OpenWriter_Stream_Listener ()
  {
  }
};

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class to handle meta-streams
 */
class ABI_EXPORT OpenWriter_MetaStream_Listener : public OpenWriter_Stream_Listener
{
public:
  OpenWriter_MetaStream_Listener ( IE_Imp_OpenWriter * importer )
    : OpenWriter_Stream_Listener ( importer )
  {
  }
  
  virtual ~OpenWriter_MetaStream_Listener ()
  {
  }
  
  virtual void startElement (const XML_Char * name, const XML_Char ** atts) 
  {
  }
  
  virtual void endElement (const XML_Char * name)
  {
  }
  
  virtual void charData (const XML_Char * buffer, int length)
  {
  }
  
private:
};

/*!
 * Handle the meta-stream
 */
UT_Error IE_Imp_OpenWriter::_handleMetaStream ( UT_ByteBuf & stm )
{
  UT_XML reader;
  OpenWriter_MetaStream_Listener listener ( this );
  reader.setListener ( &listener );
  return reader.parse ( &stm );
}

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class to handle the settings stream
 */
class ABI_EXPORT OpenWriter_SettingsStream_Listener : public OpenWriter_Stream_Listener
{
public:
  OpenWriter_SettingsStream_Listener ( IE_Imp_OpenWriter * importer )
    : OpenWriter_Stream_Listener ( importer )
  {
  }

  virtual ~OpenWriter_SettingsStream_Listener ()
  {
  }

  virtual void startElement (const XML_Char * name, const XML_Char ** atts) 
  {
  }

  virtual void endElement (const XML_Char * name)
  {
  }

  virtual void charData (const XML_Char * buffer, int length)
  {
  }

private:
};

/*!
 * Handle the setting-stream
 */
UT_Error IE_Imp_OpenWriter::_handleSettingsStream ( UT_ByteBuf & stm )
{
  UT_XML reader;
  OpenWriter_SettingsStream_Listener listener ( this );
  reader.setListener ( &listener );
  return reader.parse ( &stm );
}

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class to handle the styles stream
 */
class ABI_EXPORT OpenWriter_StylesStream_Listener : public OpenWriter_Stream_Listener
{
public:
  OpenWriter_StylesStream_Listener ( IE_Imp_OpenWriter * importer )
    : OpenWriter_Stream_Listener ( importer )
  {
  }
  
  virtual ~OpenWriter_StylesStream_Listener ()
  {
  }
  
  virtual void startElement (const XML_Char * name, const XML_Char ** atts) 
  {
  }
  
  virtual void endElement (const XML_Char * name)
  {
  }
  
  virtual void charData (const XML_Char * buffer, int length)
  {
  }

private:
};

/*!
 * Handle the styles-stream
 */
UT_Error IE_Imp_OpenWriter::_handleStylesStream ( UT_ByteBuf & stm )
{
  UT_XML reader;
  OpenWriter_StylesStream_Listener listener ( this );
  reader.setListener ( &listener );
  return reader.parse ( &stm );
}

/*****************************************************************************/
/*****************************************************************************/

/*!
 * Class to handle the content stream
 */
class ABI_EXPORT OpenWriter_ContentStream_Listener : public OpenWriter_Stream_Listener
{
private:
  UTF8_To_UCS2_Manager m_manager;
  bool m_bInParagraph;

public:
  OpenWriter_ContentStream_Listener ( IE_Imp_OpenWriter * importer )
    : OpenWriter_Stream_Listener ( importer ), m_bInParagraph(false)
  {
  }

  virtual ~OpenWriter_ContentStream_Listener ()
  {
  }

  virtual void startElement (const XML_Char * name, const XML_Char ** atts) 
  {
    if ( !UT_strcmp(name, "text:p" ) )
      {
        getDocument()->appendStrux(PTX_Block, NULL);
	m_bInParagraph = true;
      }
  }

  virtual void endElement (const XML_Char * name)
  {
    if ( !UT_strcmp(name, "text:p" ) )
      {
	m_bInParagraph = false;
      }
  }

  virtual void charData (const XML_Char * buffer, int length)
  {
    UT_UCS2String ucs2;
    if ( m_bInParagraph && UT_OK == m_manager.toucs2 ( buffer, length, ucs2 ) )
      {
	if ( ucs2.size () > 0 )
	  {
	    UT_DEBUGMSG(("DOM: appending %d chars\n", ucs2.size()));
	    getDocument()->appendSpan ( ucs2.ucs_str(), ucs2.size () );
	  }
      }
  }

};

/*!
 * Handle the content-stream
 */
UT_Error IE_Imp_OpenWriter::_handleContentStream ( UT_ByteBuf & stm )
{
  UT_XML reader;
  OpenWriter_ContentStream_Listener listener ( this );
  reader.setListener ( &listener );

  // quick hack to append us a section without actually parsing the xml document and recognizing its tags
  getDocument()->appendStrux(PTX_Section, NULL);

  return reader.parse ( &stm );
}

/****************************************************************************/
/****************************************************************************/

#if 0

#include "ie_exp.h"

// start of an OpenWriter Export Plugin

class ABI_EXPORT IE_Exp_OpenWriter : public IE_Exp
{
private:
  UTF8_To_UCS2_Manager m_manager;
  ZipOutputStream * m_zos;

public:
  IE_Exp_OpenWriter (PD_Document * pDocument)
    : IE_Exp (pDocument), m_zos(0)
  {
  }
  
  virtual ~IE_Exp_OpenWriter ()
  {
    _closeFile ();
  }
  
protected:
  
  /*!
   * Writes the following UCS2 string to the open stream
   */
  UT_sint32 _writeUCS2 (const UT_UCSChar * ucs2)
  {
    if(!ucs2)
      return 0;
    return _writeUCS2 (ucs2, UT_UCS_strlen(ucs2));
  }
  
  /*!
   * Writes the following UCS2 string to the open stream
   */
  UT_sint32 _writeUCS2 (const UT_UCSChar * ucs2, UT_uint32 len)
  {
    if(!ucs2 || !len)
      return 0;
    
    UT_String utf8;
    if (UT_OK == m_manager.toutf8 (ucs2, len, utf8))
      {
	m_zos->write (utf8.c_str(), utf8.size());
      }
    
    return len;
  }
  
  /*!
   * Writes the ASCII string to the open stream
   */
  UT_sint32 _write (const char * str)
  {
    if(!str)
      return 0;
    return _write (str, strlen(str));
  }
  
  /*!
   * Writes the ASCII string to the open string
   */
  UT_uint32 _write (const char * str, UT_uint32 len)
  {
    if(!str || !len)
      return 0;
    
    m_zos->write(str, len);
    return len;
  }
  
  /*!
   * Create a substream inside of the current zip file
   */
  UT_Error _createSubStream ( const char * szFilename )
  {
    try
      {
	// the next thing that we write to will be this file
	m_zos->putNextEntry( ZipCDirEntry( szFilename ) ) ;
	return UT_OK;
      }
    catch (...)
      {
	return UT_ERROR;
      }
  }
  
  /*!
   * Open the zip file
   */
  virtual bool _openFile(const char * szFilename)
  {
    try 
      {
	m_zos = new ZipOutputStream ( szFilename );
	return true;
      }
    catch (...)
      {
	return false;
      }
  }

  /*!
   * Close the zip file
   */
  virtual bool _closeFile ()
  {
    DELETEP(m_zos);
    return true;
  }
  
  /*!
   * Actually create a listener capable of handling the document's
   * callbacks and write out the document to disk
   */
  virtual UT_Error _writeDocument(void)
  {
    OpenWriter_OutputDevice * pListener = 
      new OpenWriter_OutputDevice(getDoc(),this);
    
    if (!pListener)
      return UT_IE_NOMEMORY;
    if (!getDoc()->tellListener(static_cast<PL_Listener *>(pListener)))
      return UT_ERROR;
    DELETEP(pListener);
    
    return UT_OK;
  }

};

/*!
 * Export sniffer
 */
class ABI_EXPORT IE_Exp_OpenWriter_Sniffer : public IE_ExpSniffer
{
  /*!
   * Recognize this suffix
   */
  bool recognizeSuffix(const char * szSuffix)
  {
    return (!UT_stricmp(szSuffix,".sxw"));
  }

  /*!
   * Construct an importer for us
   */
  UT_Error constructExporter(PD_Document * pDocument,
			     IE_Exp ** ppie)
  {
    IE_Exp_OpenWriter * p = new IE_Exp_OpenWriter(pDocument);
    *ppie = p;
    return UT_OK;
  }
  
  /*!
   * Get the dialog labels
   */
  bool getDlgLabels(const char ** pszDesc,
		    const char ** pszSuffixList,
		    IEFileType * ft)
  {
    *pszDesc = "OpenWriter (.sxw)";
    *pszSuffixList = "*.sxw";
    *ft = getFileType();
    return true;
  }
};

#endif

/****************************************************************************/
/****************************************************************************/

// completely generic C-interface code to allow this to be a plugin

ABI_PLUGIN_DECLARE("OpenWriter")
  
// we use a reference-counted sniffer
static IE_Imp_OpenWriter_Sniffer * m_sniffer = 0;

ABI_FAR_CALL
int abi_plugin_register (XAP_ModuleInfo * mi)
{
  
  if (!m_sniffer)
    {
      m_sniffer = new IE_Imp_OpenWriter_Sniffer ();
    }
  else
    {
      m_sniffer->ref();
    }
  
  UT_ASSERT (m_sniffer);
  
  mi->name    = "OpenWriter Importer";
  mi->desc    = "Import Sun's OpenWriter documents";
  mi->version = ABI_VERSION_STRING;
  mi->author  = "Dom Lachowicz <cinamod@hotmail.com>";
  mi->usage   = "No Usage";
  
  IE_Imp::registerImporter (m_sniffer);
  return 1;
}

ABI_FAR_CALL
int abi_plugin_unregister (XAP_ModuleInfo * mi)
{
  mi->name    = 0;
  mi->desc    = 0;
  mi->version = 0;
  mi->author  = 0;
  mi->usage   = 0;
  
  UT_ASSERT (m_sniffer);
  
  IE_Imp::unregisterImporter (m_sniffer);
  if (!m_sniffer->unref())
    {
      m_sniffer = 0;
    }
  
  return 1;
}

ABI_FAR_CALL
int abi_plugin_supports_version (UT_uint32 major, UT_uint32 minor, 
				 UT_uint32 release)
{
  return 1;
}

/****************************************************************************/
/****************************************************************************/