/* AbiWord
 * Copyright (C) 1998 AbiSource, Inc.
 * Copyright (C) 1998-2000 Hubert Figuiere
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  
 * 02111-1307, USA.
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ut_types.h"
#include "ut_assert.h"
#include "ut_debugmsg.h"
#include "ut_string.h"
#include "ie_impexp_MSWrite.h"
#include "ie_imp_MSWrite.h"
#include "pd_Document.h"
#include "ut_growbuf.h"

/*****************************************************************/
/* Generic IO                                                    */
/*****************************************************************/

// FIXME : should be guessed from the build target
#define __LITTLE_ENDIAN 1

#if defined (__LITTLE_ENDIAN) && (__LITTLE_ENDIAN == 1)
#define ReadWord16(file, value)		{ fread(&(value), sizeof(UT_uint16), 1, (file)); }
#else
#define ReadWord16(file, value)		{ fread(&(value), sizeof(UT_uint16), 1, (file)); \
											value = ((value & 0xFF) << 8) | ((value & 0xFF00) >> 8); }
#endif


#if defined (__LITTLE_ENDIAN) && (__LITTLE_ENDIAN == 1)
#define ReadWord32(file, value)		{ fread(&(value), sizeof(UT_uint32), 1, (file)); }
#else
#define ReadWord32(file, value)		\
   { UT_uint16 w; UT_uint32 tmp; ReadWord16(file, w); value = w; \
		   ReadWord16(file, w); tmp = w; value |= tmp << 16; }
#endif


/*****************************************************************/
/*****************************************************************/

/*
  Import MS Write file.
*/

static const char PROPS_XML_ATTR_NAME[] = "PROPS";

// char attributes
static const char FONT_FAMILY[] = "font-family:%s;";

static const char FONT_STYLE[] = "font-style:%s;";
static const char FONT_STYLE_ITALIC[] = "italic";

static const char FONT_WEIGHT[] = "font-weight:%s;";
static const char FONT_WEIGHT_BOLD[] = "bold";

static const char FONT_SIZE[] = "font-size:%spt;";


/*****************************************************************/
/*****************************************************************/

#define X_CleanupIfError(ies,exp)	do { if (((ies)=(exp)) != UT_OK) goto Cleanup; } while (0)

UT_Error IE_Imp_MSWrite::importFile(const char * szFilename)
{
    FILE *fp = fopen(szFilename, "r");
    if (!fp)
    {
        UT_DEBUGMSG(("Could not open file %s\n",szFilename));
        return UT_IE_FILENOTFOUND;
    }
    
    UT_Error iestatus;
    
    X_CleanupIfError(iestatus,_writeHeader(fp));
    X_CleanupIfError(iestatus,_parseFile(fp));
    
    iestatus = UT_OK;
    
 Cleanup:
    fclose(fp);
    return iestatus;
}

#undef X_CleanupIfError

/*****************************************************************/
/*****************************************************************/

IE_Imp_MSWrite::~IE_Imp_MSWrite()
{
}

IE_Imp_MSWrite::IE_Imp_MSWrite(PD_Document * pDocument)
    : IE_Imp(pDocument)
{
}

/*****************************************************************/
/*****************************************************************/

#define X_ReturnIfFail(exp,ies)		do { UT_Bool b = (exp); if (!b) return (ies); } while (0)
#define X_ReturnNoMemIfError(exp)	X_ReturnIfFail(exp,UT_IE_NOMEMORY)

UT_Error IE_Imp_MSWrite::_writeHeader(FILE * /* fp */)
{
    X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Section, NULL));
    
    return UT_OK;
}

UT_Error IE_Imp_MSWrite::_parseFile(FILE * fp)
{
    UT_sint32 err;
    
    int textPosPointer, fodPosPointer;
    WRI_format_page_t aFormatPage, aFormatPage2;
    WRI_format_page_t *pFmt1 = &aFormatPage;
    WRI_format_page_t *pFmt2 = &aFormatPage2;
    
    WRI_write_file_desc_t fileDesc;
    
    err = ReadFileDesc (fp, fileDesc);
    if (err == NO_ERROR) {
        
        UT_uint32 sizeToRead = fileDesc.fcMac;
        UT_uint32 rangeBeginning = 0,
                     rangeEnding = 0;
        
        fodPosPointer = fileDesc.fcMac;
        // move to the beginning of the text data
        textPosPointer = PAGE_SIZE * 1;
        fseek(fp, textPosPointer, SEEK_SET);
        
        ReadFormatPage (fp, fodPosPointer, pFmt1);
        do 
        {
            ReadFormatPage (fp, fodPosPointer, pFmt2);
            
            // where does begin the current formatting page
            rangeBeginning = pFmt1->fcFirst;
            rangeEnding = pFmt2->fcFirst;
            
            
            if (ReadTextRangeWithFormat (fp, sizeToRead, *pFmt1) == UT_OK) 
            {
            }
        }
        while (rangeEnding < fodPosPointer - 1);
        
        
        // FIXIT : maybe we should remove this case since the file is supposed
        // to bo consistent
        /*
          if (gbBlock.getLength() > 0)
          {
          // if we have text left over (without final CR/LF),
          // create a paragraph and emit the text now.
          X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Block, NULL));
          X_ReturnNoMemIfError(m_pDocument->appendSpan(gbBlock.getPointer(0), 
          gbBlock.getLength()));
          }
        */
    }
    else 
    {
        return UT_ERROR;
    }
    
    return UT_OK;
}


UT_Error
IE_Imp_MSWrite::ReadTextRangeWithFormat (FILE * fp, const UT_uint32 end, 
                                         const WRI_format_page_t & fmtData)
{
    UT_Bool bEatLF = UT_FALSE;
    UT_uint32 count;
    UT_Byte c;
    UT_GrowBuf gbBlock(1024);
    UT_UCSChar uc   ;
    UT_uint32 nextPropPos;      //next property position
    int currentFod = 0;
    
    WRI_Format_Page formatting (&fmtData);
    
    
    // extract a FOD
    
    count = ftell (fp);
    
    //   end = formatting.getFod(currentFod)->fcLim;
    
    while ((count < end) && 
           (fread(&c, 1, sizeof(c), fp) > 0))
    {
        do 
        {
            count++;
            
            switch (c)
            {
            case '\r':
            case '\n':
                if ((c == '\n') && bEatLF)
                {
                    bEatLF = UT_FALSE;
                    break;
                }
                
                if (c == '\r')
                {
                    bEatLF = UT_TRUE;
                }
                
                // a paragraph is delimited by a CRLF as specified 
                // by the file format specification
                
                X_ReturnNoMemIfError(m_pDocument->appendStrux(PTX_Block, NULL));
                if (gbBlock.getLength() > 0)
                {
                    X_ReturnNoMemIfError(
                        m_pDocument->appendSpan(gbBlock.getPointer(0), 
                                                gbBlock.getLength()));
                    gbBlock.truncate(0);
                }
                break;
            case 0x09:
                // handle tabs
                uc = UCS_VTAB;
                X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),&uc,1));
                break;
            case 0x0C:
                // handle form feeds
                uc = UCS_FF;
                X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),&uc,1));
                break;
            default:
                bEatLF = UT_FALSE;
                
                // deal with plain character.
                // this cast is OK.  we have US-ASCII (actually Latin-1) character
                // data, so we can do this.
                
                // TODO consider scanning for CP1252 
                // because actually it is CP1252 and NOT Latin-1
                
                UT_UCSChar uc = (UT_UCSChar) c;
                X_ReturnNoMemIfError(gbBlock.ins(gbBlock.getLength(),&uc,1));
                break;
            }
        } while (count <= nextPropPos);
        // we read the corresponding text.
        
        // now add the atttributes.
        XML_Char * propsBuffer = (XML_Char *)malloc (1024 * sizeof (XML_Char));
        
        const XML_Char* propsArray[3];
        propsArray[0] = PROPS_XML_ATTR_NAME;
        propsArray[1] = propsBuffer;
        propsArray[2] = NULL;
        
        m_pDocument->appendFmt (propsArray);
        free (propsBuffer);
    } 
    
    return UT_OK;
}


#undef X_ReturnNoMemIfError
#undef X_ReturnIfFail


/*****************************************************************/
/*****************************************************************/

//
// What is this for ? I don't know...
// FIXME
//
void IE_Imp_MSWrite::pasteFromBuffer(PD_DocumentRange * pDocRange,
                                     unsigned char * pData, UT_uint32 lenData)
{
    UT_ASSERT(m_pDocument == pDocRange->m_pDoc);
    UT_ASSERT(pDocRange->m_pos1 == pDocRange->m_pos2);
    
    UT_GrowBuf gbBlock(1024);
    UT_Bool bEatLF = UT_FALSE;
    UT_Bool bSuppressLeadingParagraph = UT_TRUE;
    UT_Bool bInColumn1 = UT_TRUE;
    unsigned char * pc;
    
    PT_DocPosition dpos = pDocRange->m_pos1;
    
    for (pc=pData; (pc<pData+lenData); pc++)
    {
        unsigned char c = *pc;
        
        switch (c)
        {
        case '\r':
        case '\n':
            if ((c == '\n') && bEatLF)
            {
                bEatLF = UT_FALSE;
                break;
            }
            
            if (c == '\r')
            {
                bEatLF = UT_TRUE;
            }
            
            // we interprete either CRLF, CR, or LF as a paragraph break.
            
            if (gbBlock.getLength() > 0)
            {
                // flush out what we have
                m_pDocument->insertSpan(dpos, gbBlock.getPointer(0), gbBlock.getLength());
                dpos += gbBlock.getLength();
                gbBlock.truncate(0);
            }
            bInColumn1 = UT_TRUE;
            break;
            
        default:
            bEatLF = UT_FALSE;
            if (bInColumn1 && !bSuppressLeadingParagraph)
            {
                m_pDocument->insertStrux(dpos,PTX_Block);
                dpos++;
            }
            
            // deal with plain character.
            // this cast is OK.  we have US-ASCII (actually Latin-1) character
            // data, so we can do this.
            
            UT_UCSChar uc = (UT_UCSChar) c;
            gbBlock.ins(gbBlock.getLength(),&uc,1);
            
            bInColumn1 = UT_FALSE;
            bSuppressLeadingParagraph = UT_FALSE;
            break;
        }
    } 
    
    if (gbBlock.getLength() > 0)
    {
        // if we have text left over (without final CR/LF),
        m_pDocument->insertSpan(dpos, gbBlock.getPointer(0), gbBlock.getLength());
        dpos += gbBlock.getLength();
    }
    
    return;
}

/*****************************************************************/
/*****************************************************************/

UT_Bool IE_Imp_MSWrite::RecognizeContents(const char * szBuf, UT_uint32 iNumbytes)
{
    if ( iNumbytes > 8 )
    {
        if ( szBuf[0] == (char)0x31 && szBuf[1] == (char)0xbe &&
             szBuf[2] == (char)0 && szBuf[3] == (char)0 )
        {
            return(UT_TRUE);
        }
    }
    return(UT_FALSE);
}


UT_Bool IE_Imp_MSWrite::RecognizeSuffix(const char * szSuffix)
{
    return (UT_stricmp(szSuffix,".wri") == 0);
}


UT_Error IE_Imp_MSWrite::StaticConstructor(PD_Document * pDocument,
                                           IE_Imp ** ppie)
{
    IE_Imp_MSWrite * p = new IE_Imp_MSWrite(pDocument);
    *ppie = p;
    return UT_OK;
}


UT_Bool	IE_Imp_MSWrite::GetDlgLabels(const char ** pszDesc,
                                     const char ** pszSuffixList,
                                     IEFileType * ft)
{
    *pszDesc = "MS-Write (.wri)";
    *pszSuffixList = "*.wri";
    *ft = IEFT_MSWrite;
    return UT_TRUE;
}

UT_Bool IE_Imp_MSWrite::SupportsFileType(IEFileType ft)
{
    return (IEFT_MSWrite == ft);
}


///////////////////////////////////////////////////////////////////
UT_uint32 IE_Imp_MSWrite::ReadFileDesc(FILE * file, 
                                       WRI_write_file_desc_t & desc)
{
    int i;
    UT_uint16 current;
    
    fseek(file, 0, SEEK_SET);
    //0
    ReadWord16(file, desc.wIdent);
    if ((desc.wIdent != IDENT) && (desc.wIdent != IDENT_OLE)) {
        return BAD_FORMAT;
    }
    //1
    ReadWord16(file, current);
    if (current != 0) {
        return BAD_FORMAT;
    }
    //2
    ReadWord16(file, current);
    if (current != TOOL_WORD) {
        return BAD_FORMAT;
    }
    for (i = 3; i <= 6; i++) {
        ReadWord16(file, current);
        if (current != 0) {
            return BAD_FORMAT;
        }
    }
    
    ReadWord32(file, desc.fcMac);
    ReadWord16(file, desc.pnPara);
    ReadWord16(file, desc.pnFntb);
    ReadWord16(file, desc.pnSep);
    ReadWord16(file, desc.pnSetb);
    ReadWord16(file, desc.pnPgtb);
    ReadWord16(file, desc.pnFntb);
    
    //reserved for Word
    fseek(file, 48, SEEK_SET);
    
    ReadWord16(file, desc.pnMac);
    
    desc.pnChar = (desc.fcMac + 127) / 128;
    
    return NO_ERROR;
}


///////////////////////////////////////////////////////////////////
UT_uint32 
IE_Imp_MSWrite::ReadText(FILE * file, const WRI_write_file_desc_t & desc, 
                                   UT_uint16 pageNumber,/* start at 1 */
                                   UT_Byte * buf, size_t & bufLen)
   /*
     Reads the text at page pageNumber into buf which has bufLen bytes
     len. Then bufLen contains the num of byte reads.
     pageNumber 0 is invalid !! It the header...
     
     Currently the pictures are read as is and might generate some unwanted
     chars in the text stream.
     Separating picture will come when decoding paragraph informations.
   */
{
    size_t sizeToWrite;
    size_t numRemaining = desc.fcMac - (pageNumber * 128);
    
    sizeToWrite = (numRemaining > bufLen ? bufLen : numRemaining);
    
    fseek(file, PAGE_SIZE * pageNumber, SEEK_SET);
    bufLen = fread(buf, 1, sizeToWrite, file);
    
    return NO_ERROR;
}


/////////////////////////////////////////////////////////////////
UT_uint32 
IE_Imp_MSWrite::ReadFormatPage (FILE * file, int & pos, 
                                          WRI_format_page_t * aPage)
{
    // FIXME : check error code
    
    int filePos = ftell (file);
    
    fseek (file, pos, SEEK_SET);
    
    ReadWord32 (file, aPage->fcFirst);
    fread (&aPage->data, 1, sizeof (aPage->data), file);
    fread (&aPage->cFod, 1, sizeof (aPage->cFod), file);
    
    // restore file position to return in the previous position, 
    // probably the text stream
    pos = ftell (file);
    fseek (file, filePos, SEEK_SET);
    
    return NO_ERROR;
}


/////////////////////////////////////////////////////////////////
XML_Char * 
IE_Imp_MSWrite::MakeProperties (XML_Char * buf, const size_t bufSize,
                                XML_Char * attr, XML_Char * value)
   //  build a properties XML pair and add it to the buffer.
   //  return buf.
   //
   //  TODO: make buf dynamically reallocated if needed.
{
    XML_Char * temp = (XML_Char *)malloc ((strlen (attr) + strlen (value) + 1) 
                                          * sizeof (XML_Char));
    // here we know that 'temp' has the right size. No overflow possible
    sprintf (temp, attr, value);
    strncat (buf, temp, bufSize);
    free (temp);
    
    return buf;
}


//  UT_uint32 IE_Imp_MSWrite::ReadStyle(FILE * file, const write_file_desc & desc, 
//                                      UT_uint16 pageNumber,/* start at 1 */
//                                      text_run_array * & buf, size_t textLen)
//     /*
//       Return the text run array for the text starting a pageNumber.
     
//       Typically this function should be called right after a ReadText with
//       bufLen == textLen so that you retrieve both the text and formatting. 
     
//       buf will be allocated and must be freed by the caller. input value
//       is IGNORED.
     
//       All offsets are relative to the beginning of the document and not of the 
//       current data seek.
//     */
//  {
//     size_t remainingLen = textLen;
//     UT_uint16 currentPage = pageNumber + desc.pnPara - 1;
//     fseek(file, PAGE_SIZE * currentPage);
   
   
//     while (remainingLen > 0) {
//        ReadPageStyle(buf, currentPage, remainingLen);
//        currentPage++;
//     }
   
//     return NO_ERROR;
//  }

//  UT_uint32 IE_Imp_MSWrite::ReadPageStyle(text_run_array * & buf, 
//                                          UT_uint16 currentPage, 
//                                          size_t & remainingLen)
//  {
//     char page[PAGE_SIZE];
//     UT_Byte numOfFod;
   
//     fread(file, 1, PAGE_SIZE, page);
   
//     numOfFod = page[127];
//  }