/* Abiword * Copyright (C) 2001 Christian Biesinger * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include "ut_types.h" #include "ut_string.h" #include "ut_iconv.h" #include "ut_debugmsg.h" #include "pd_Document.h" #include "ie_imp_StarOffice.h" #include "ie_imp_StarOffice_encodings.h" #include "xap_Dialog.h" #include "xap_Frame.h" // for password dialog #include "xap_Dlg_Password.h" #include "xap_Dialog_Id.h" #include "xap_DialogFactory.h" #ifdef DEBUG #include #endif #define CHECK_OLE_RV(err) do { if ((err) != MS_OLE_ERR_OK) return UT_IE_BOGUSDOCUMENT; } while (0) #define RETURN_IF_FALSE(err) do { if (!(err)) return UT_IE_BOGUSDOCUMENT; } while (0) // ******************************************************************************** // Password getter, taken from Word97 importer #define GetPassword() _getPassword ( getDoc()->getApp()->getLastFocussedFrame() ) static UT_String _getPassword (XAP_Frame * pFrame) { UT_String password ( "" ); if ( pFrame ) { pFrame->raise (); XAP_DialogFactory * pDialogFactory = (XAP_DialogFactory *)(pFrame->getDialogFactory()); XAP_Dialog_Password * pDlg = static_cast(pDialogFactory->requestDialog(XAP_DIALOG_ID_PASSWORD)); UT_ASSERT(pDlg); pDlg->runModal (pFrame); XAP_Dialog_Password::tAnswer ans = pDlg->getAnswer(); bool bOK = (ans == XAP_Dialog_Password::a_OK); if (bOK) password = pDlg->getPassword (); UT_DEBUGMSG(("SDW: Password is %s\n", password.c_str())); pDialogFactory->releaseDialog(pDlg); } return password; } // ******************************************************************************** // Sniffer static const UT_Byte sdwSignature[] = {0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}; UT_Confidence_t IE_Imp_StarOffice_Sniffer::recognizeContents(const char* szBuf, UT_uint32 iNumBytes) { if (iNumBytes >= sizeof(sdwSignature)) { return (memcmp(szBuf, sdwSignature, sizeof(sdwSignature)) == 0) ? UT_CONFIDENCE_GOOD : UT_CONFIDENCE_ZILCH; } return UT_CONFIDENCE_ZILCH; } UT_Confidence_t IE_Imp_StarOffice_Sniffer::recognizeSuffix(const char* szSuffix) { return (UT_stricmp(szSuffix, ".sdw") == 0) ? UT_CONFIDENCE_PERFECT : UT_CONFIDENCE_ZILCH; } UT_Error IE_Imp_StarOffice_Sniffer::constructImporter(PD_Document *pDocument, IE_Imp **ppie) { *ppie = new IE_Imp_StarOffice(pDocument); if (!ppie) return UT_OUTOFMEM; return UT_OK; } bool IE_Imp_StarOffice_Sniffer::getDlgLabels(const char** pszDesc, const char** pszSuffixList, IEFileType* ft) { *pszDesc = "StarWriter up to 5.x (*.sdw)"; *pszSuffixList = "*.sdw"; *ft = getFileType(); return true; } // ******************************************************************************** // Header Class UT_Error DocHdr::load(MsOleStream* stream) { UT_DEBUGMSG(("SDW: entering DocHdr::load\n")); static const char sw3hdr[] = "SW3HDR"; static const char sw4hdr[] = "SW4HDR"; static const char sw5hdr[] = "SW5HDR"; char header[7]; bool err = streamRead(stream, header, 7); RETURN_IF_FALSE(err); if (memcmp(header, sw3hdr, sizeof(sw3hdr)) != 0 && memcmp(header, sw4hdr, sizeof(sw4hdr)) != 0 && memcmp(header, sw5hdr, sizeof(sw5hdr)) != 0) return UT_IE_BOGUSDOCUMENT; err = streamRead(stream, cLen); RETURN_IF_FALSE(err); err = streamRead(stream, nVersion); RETURN_IF_FALSE(err); err = streamRead(stream, nFileFlags); RETURN_IF_FALSE(err); err = streamRead(stream, nDocFlags); RETURN_IF_FALSE(err); err = streamRead(stream, nRecSzPos); RETURN_IF_FALSE(err); err = streamRead(stream, nDummy); RETURN_IF_FALSE(err); err = streamRead(stream, nDummy16); RETURN_IF_FALSE(err); err = streamRead(stream, cRedlineMode); RETURN_IF_FALSE(err); err = streamRead(stream, nCompatVer); RETURN_IF_FALSE(err); UT_DEBUGMSG(("SDW: clen %i nversion %i fileflags %i docflags %i recszpos %i readlinemode %i compatver %i\n", cLen, nVersion, nFileFlags, nDocFlags, nRecSzPos, cRedlineMode, nCompatVer)); // (see sw/source/core/sw3io/sw3doc.cxx line 700) if (nVersion >= SWG_MAJORVERSION && nCompatVer > 0) { // File is in a too new format return UT_IE_BOGUSDOCUMENT; } err = streamRead(stream, cPasswd, 16); RETURN_IF_FALSE(err); err = streamRead(stream, cSet); RETURN_IF_FALSE(err); err = streamRead(stream, cGui); RETURN_IF_FALSE(err); err = streamRead(stream, nDate); RETURN_IF_FALSE(err); err = streamRead(stream, nTime); RETURN_IF_FALSE(err); UT_DEBUGMSG(("SDW: nDate %lu nTime %lu\n", nDate, nTime)); // Find the name of the used encoding for (unsigned int i = 0; i < sizeof(SOEncodings)/sizeof(SOEncodings[0]); i++) { if (SOEncodings[i].number == cSet) { UT_DEBUGMSG(("SDW: Found charset %s for encoding #%i\n", SOEncodings[i].name, cSet)); converter = iconv_open(UCS_2_INTERNAL, SOEncodings[i].name); if (UT_iconv_isValid(converter)) break; } } if (!UT_iconv_isValid(converter)) return UT_ERROR; if (nFileFlags & SWGF_BLOCKNAME) { char buf[64]; err = streamRead(stream, buf, 64); // XXX verify that the string is really null terminated sBlockName = (UT_UCSChar*)UT_convert_cd(buf, strlen(buf) + 1, converter, NULL, NULL); } if (nRecSzPos != 0 && nVersion >= SWG_RECSIZES) { // Read the Recsizes // XXX to be done see sw/source/core/sw3io/sw3imp.cxx#L1070 UT_ASSERT(UT_NOT_IMPLEMENTED); } if (nFileFlags & SWGF_BAD_FILE) return UT_IE_BOGUSDOCUMENT; if (nFileFlags & SWGF_HAS_PASSWD) cryptor = new SDWCryptor(nDate, nTime, cPasswd); else cryptor = NULL; return UT_OK; } // ******************************************************************************** // Actual Importer IE_Imp_StarOffice::IE_Imp_StarOffice(PD_Document *pDocument) : IE_Imp(pDocument), mOle(NULL), mDocStream(NULL) { } IE_Imp_StarOffice::~IE_Imp_StarOffice() { if (mDocStream) ms_ole_stream_close(&mDocStream); if (mOle) ms_ole_destroy(&mOle); } bool IE_Imp_StarOffice::readRecSize(MsOleStream* aStream, UT_uint32& aSize) { // Yes, that's correct, only 3 bytes. guint8 buf [3]; aSize = 0; bool rv = streamRead(aStream, buf, 3); // buf content is little endian. aSize = buf [0] | (buf[1] << 8) | (buf [2] << 16); aSize -= 4; // Substract 4 for the rec type + size if (aSize == 0xFFFFFF && mDocHdr.nVersion >= SWG_LONGRECS) { // XXX need recsizes from header, see above UT_ASSERT(UT_NOT_IMPLEMENTED); } return rv; } bool IE_Imp_StarOffice::readFlagRec(MsOleStream* stream, UT_uint8& flags, UT_uint32* newPos) { if (!streamRead(stream, flags)) return false; if (newPos) *newPos = ms_ole_stream_tell(stream) + (flags & 0xF); return true; } bool IE_Imp_StarOffice::readByteString(MsOleStream* stream, char*& str, UT_uint16* aLength) { UT_uint16 length; str = NULL; if (!streamRead(stream, length)) return false; str = new char[length + 1]; if (!streamRead(stream, str, length)) return false; str[length] = 0; if (aLength) *aLength = length; return true; } bool IE_Imp_StarOffice::readByteString(MsOleStream* stream, UT_UCSChar*& str) { UT_uint16 len; char* rawString; str = NULL; if (!readByteString(stream, rawString, &len)) return false; // decrypt if (mDocHdr.cryptor) mDocHdr.cryptor->Decrypt(rawString, rawString, len); str = (UT_UCSChar*)UT_convert_cd(rawString, len + 1, mDocHdr.converter, NULL, NULL); #ifdef DEBUG if (!str) { UT_DEBUGMSG(("SDW: UT_convert_cd returned %i (%s)\n", errno, strerror(errno))); UT_DEBUGMSG(("SDW: Failed string was: \"%s\"\n", rawString)); } #endif delete[] rawString; return str ? true : false; } UT_Error IE_Imp_StarOffice::importFile(const char* szFilename) { UT_DEBUGMSG(("SDW: Starting import\n")); UT_Error rv; MsOleErr err = ms_ole_open(&mOle, szFilename); CHECK_OLE_RV(err); err = ms_ole_stream_open(&mDocStream, mOle, "/", "StarWriterDocument", 'r'); CHECK_OLE_RV(err); // Read the CompObj (no, I don't know what Comp stands for) if ((rv = readCompObj()) != UT_OK) return rv; if (!getDoc()->appendStrux(PTX_Section, NULL)) return UT_IE_NOMEMORY; if (!getDoc()->appendStrux(PTX_Block, NULL)) return UT_IE_NOMEMORY; UT_DEBUGMSG(("SDW: Attempting to load DocHdr\n")); if ((rv = mDocHdr.load(mDocStream)) != UT_OK) return rv; // Verify the password if (mDocHdr.cryptor) { if (!mDocHdr.cryptor->SetPassword(GetPassword().c_str())) { UT_DEBUGMSG(("SDW: Wrong password\n")); return UT_IE_PROTECTED; } } // do the actual reading char type; bool done = false; UT_uint32 recSize; while (readChar(mDocStream, type) && !done) { if (!readRecSize(mDocStream, recSize)) return UT_IE_BOGUSDOCUMENT; switch (type) { case SWG_CONTENTS: { UT_uint32 flagsEnd = 0; UT_uint32 nNodes; // sw/source/core/sw3io/sw3sectn.cxx#L129 if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint8 flags; if (!readFlagRec(mDocStream, flags, &flagsEnd)) return UT_IE_BOGUSDOCUMENT; } if (mDocHdr.nVersion >= SWG_LONGIDX) streamRead(mDocStream, nNodes); else { if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint16 sectidDummy; if (!streamRead(mDocStream, sectidDummy)) return UT_IE_BOGUSDOCUMENT; } UT_uint16 nodes16; streamRead(mDocStream, nodes16); nNodes = (UT_uint32)nodes16; } if (flagsEnd) { UT_ASSERT(flagsEnd >= ms_ole_stream_tell(mDocStream)); if (ms_ole_stream_tell(mDocStream) != flagsEnd) { UT_DEBUGMSG(("SDW: have not read all flags\n")); ms_ole_stream_lseek(mDocStream, flagsEnd, MsOleSeekSet); } } bool done2 = false; UT_uint32 size2; while (readChar(mDocStream, type) && !done2) { if (!readRecSize(mDocStream, size2)) return UT_IE_BOGUSDOCUMENT; switch (type) { case SWG_TEXTNODE: { // sw/source/core/sw3io/sw3nodes.cxx#L788 UT_DEBUGMSG(("SDW: Found Textnode!\n")); UT_uint8 flags; UT_uint32 newPos; if (!readFlagRec(mDocStream, flags, &newPos)) return UT_IE_BOGUSDOCUMENT; UT_uint16 coll; // ??? if (!streamRead(mDocStream, coll)) return UT_IE_BOGUSDOCUMENT; // XXX check flags if (ms_ole_stream_tell(mDocStream) != newPos) ms_ole_stream_lseek(mDocStream, newPos, MsOleSeekSet); // Read the actual text UT_UCSChar* str; if (!readByteString(mDocStream, str)) return UT_IE_BOGUSDOCUMENT; UT_uint32 len = UT_UCS_strlen(str); if (len) getDoc()->appendSpan(str, len); free(str); getDoc()->appendStrux(PTX_Block, NULL); break; } default: UT_DEBUGMSG(("SDW: SWG_CONTENT: Skipping %lu bytes for record type '%c' (starting at %08lX)\n", size2, type, ms_ole_stream_tell(mDocStream))); if (ms_ole_stream_lseek(mDocStream, size2, MsOleSeekCur) < 0) return UT_IE_BOGUSDOCUMENT; } } break; } case SWG_EOF: done = true; break; default: UT_DEBUGMSG(("SDW: Skipping %lu bytes for record type '%c' (starting at %08lX)\n", recSize, type, ms_ole_stream_tell(mDocStream))); // Skip the record if (ms_ole_stream_lseek(mDocStream, recSize, MsOleSeekCur) < 0) return UT_IE_BOGUSDOCUMENT; } } return UT_OK; } // ******************************************************************************** // CompObj functions UT_Error IE_Imp_StarOffice::readCompObj() { MsOleStream *compObj; MsOleErr err = ms_ole_stream_open(&compObj, mOle, "/", "\1CompObj", 'r'); CHECK_OLE_RV(err); // Get version of the file ms_ole_stream_lseek(compObj, 8, MsOleSeekSet); // see oo's sot/source/sdstor/stgole.cxx lines 144ff UT_sint32 marker; bool rv = streamRead(compObj, marker); RETURN_IF_FALSE(rv); if (marker == -1) { rv = streamRead(compObj, mCompObj.clsId); RETURN_IF_FALSE(rv); UT_sint32 length; rv = streamRead(compObj, length); RETURN_IF_FALSE(rv); mCompObj.username = new char[length + 1]; rv = streamRead(compObj, mCompObj.username, length); RETURN_IF_FALSE(rv); mCompObj.username[length] = 0; rv = streamRead(compObj, length); RETURN_IF_FALSE(rv); if (length == -1) // windows clipboard format streamRead(compObj, mCompObj.version); else if (length > 0) { mCompObj.versionString = new char[length + 1]; rv = streamRead(compObj, mCompObj.versionString, length); RETURN_IF_FALSE(rv); mCompObj.versionString[length] = 0; mCompObj.version = getVersion(mCompObj.versionString); } else if (length) return UT_IE_BOGUSDOCUMENT; } ms_ole_stream_close(&compObj); return UT_OK; } UT_uint32 IE_Imp_StarOffice::getVersion(const char* szVerString) { static const char so30ver[] = "StarWriter 3.0"; static const char so40ver[] = "StarWriter 4.0"; static const char so50ver[] = "StarWriter 5.0"; if (UT_strcmp(szVerString, so30ver) == 0) return SOFFICE_FILEFORMAT_31; else if (UT_strcmp(szVerString, so40ver) == 0) return SOFFICE_FILEFORMAT_40; else if (UT_strcmp(szVerString, so50ver) == 0) return SOFFICE_FILEFORMAT_50; else return 0; } /*******************************************************/ #include "xap_Module.h" ABI_PLUGIN_DECLARE("StarOffice") // we use a reference-counted sniffer static IE_Imp_StarOffice_Sniffer * m_impSniffer = 0; ABI_FAR_CALL int abi_plugin_register (XAP_ModuleInfo * mi) { if (!m_impSniffer) { m_impSniffer = new IE_Imp_StarOffice_Sniffer (); } else { m_impSniffer->ref(); } mi->name = "StarOffice .sdw file importer"; mi->desc = "Imports StarWriter binary (OLE) documents"; mi->version = ABI_VERSION_STRING; mi->author = "Christian Biesinger "; mi->usage = "No Usage"; IE_Imp::registerImporter (m_impSniffer); return 1; } ABI_FAR_CALL int abi_plugin_unregister (XAP_ModuleInfo * mi) { mi->name = 0; mi->desc = 0; mi->version = 0; mi->author = 0; mi->usage = 0; UT_ASSERT (m_impSniffer); IE_Imp::unregisterImporter (m_impSniffer); if(!m_impSniffer->unref()) { m_impSniffer = 0; } return 1; } ABI_FAR_CALL int abi_plugin_supports_version (UT_uint32 major, UT_uint32 minor, UT_uint32 release) { return 1; }