/* accidence * Copyright (C) 2007 Gabriel Bakiewicz * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include "accidence.h" #include "accidence-provider.h" #ifndef LTOOL_SERVER #define LTOOL_SERVER "localhost" #endif #ifndef LTOOL_PORT #define LTOOL_PORT "8081" #endif ACCIDENCE_PLUGIN_DECLARE ("LanguageTool") static const char *supported_lang[] = { "ca", /* Catalan */ "da", /* Danish */ "en", /* English */ "nl", /* Dutch */ "fr", /* French */ "ga", /* Galician */ "de", /* German */ "ic", /* Icelandic */ "it", /* Italian */ "lt", /* Lithuanian */ "pl", /* Polish */ "ro", /* Romanian */ "ru", /* Russian */ "sk", /* Slovak */ "sl", /* Slovenian */ "es", /* Spanish */ "sv", /* Swedish */ "uk" /* Ukrainian */ }; static const size_t supported_lang_count = G_N_ELEMENTS(supported_lang); static char s_errorBuffer[CURL_ERROR_SIZE]; static std::string s_buffer; class LanguageToolWrap { public: LanguageToolWrap(); ~LanguageToolWrap(); void setCheckerLanguage(const char *language) { m_sLang = language; } accidence_check_status checkSentence(const std::string& sentence); AccidenceMistake* getMistakeList(); void disposeMistakes(); private: void _parseBuffer(); CURL *m_easy_handle; std::vector m_vecMistakes; std::string m_sLang; }; // curl callback function static int writer(void *data, size_t size, size_t nmemb, std::string *buffer) { int result = 0; if (buffer != NULL) { buffer->append((char *)data, size * nmemb); result = size * nmemb; } return result; } // TODO: test if the server exists and set a status flag LanguageToolWrap::LanguageToolWrap() { m_easy_handle = curl_easy_init(); if (m_easy_handle) { curl_easy_setopt(m_easy_handle, CURLOPT_ERRORBUFFER, s_errorBuffer); curl_easy_setopt(m_easy_handle, CURLOPT_WRITEFUNCTION, writer); curl_easy_setopt(m_easy_handle, CURLOPT_WRITEDATA, &s_buffer); curl_easy_setopt(m_easy_handle, CURLOPT_TIMEOUT, 4 ); curl_easy_setopt(m_easy_handle, CURLOPT_CONNECTTIMEOUT, 5 ); } } LanguageToolWrap::~LanguageToolWrap() { curl_easy_cleanup(m_easy_handle); disposeMistakes(); } // mount languagetool url with GET parameters static char * mountURL(char *lang, char *sentence) { char *host; char *get_param; char *lang_param, *text_param; char *url; host = g_strjoin(":", LTOOL_SERVER, LTOOL_PORT, NULL); lang_param = g_strjoin("=", "language", lang, NULL); text_param = g_strjoin("=", "text", sentence, NULL); get_param = g_strjoin("&", lang_param, text_param, NULL); url = g_strjoin("/?", host, get_param, NULL); g_free(host); g_free(get_param); g_free(lang_param); g_free(text_param); return url; } accidence_check_status LanguageToolWrap::checkSentence(const std::string& sentence) { CURLcode curlResp; char *url; if (!m_easy_handle) return ACCIDENCE_CHECK_ERROR; s_buffer.clear(); url = mountURL(const_cast(m_sLang.substr(0,2).c_str()), const_cast(sentence.c_str())); curl_easy_setopt(m_easy_handle, CURLOPT_URL, url); curlResp = curl_easy_perform(m_easy_handle); if (curlResp == CURLE_OK) { _parseBuffer(); if (m_vecMistakes.size()) return ACCIDENCE_CHECK_MISTAKES; return ACCIDENCE_CHECK_OK; } g_debug("Haven't received answer from language tool server"); return ACCIDENCE_CHECK_ERROR; } // Parse the XML data in the buffer to AccidenceMistake structure void LanguageToolWrap::_parseBuffer() { xmlTextReaderPtr pTxtReader; const xmlChar *szName, *szValue; int iRet; pTxtReader = xmlReaderForMemory(s_buffer.c_str(), s_buffer.length(), "noname.xml", NULL, 0); /* DEBUG!! */ printf("XML content: \n%s\n",s_buffer.c_str()); if (!pTxtReader) { fprintf(stderr, "Failed to parse document\n"); return; } iRet = xmlTextReaderRead(pTxtReader); while (iRet) { if (xmlTextReaderDepth(pTxtReader) == 1 && xmlTextReaderNodeType(pTxtReader) == 1) { AccidenceMistake *mstk = NULL; mstk = g_new0(AccidenceMistake, 1); std::istringstream isFrom((const char*)(xmlTextReaderGetAttribute(pTxtReader, (xmlChar *)"fromx"))); isFrom >> mstk->pos_start; std::istringstream isTo((const char*)(xmlTextReaderGetAttribute(pTxtReader, (xmlChar *)"tox"))); isTo >> mstk->pos_end; mstk->message = (char *)(xmlTextReaderGetAttribute(pTxtReader, (xmlChar *)"msg")); gchar *szReplacements = (gchar *)(xmlTextReaderGetAttribute(pTxtReader, (xmlChar *)"replacements")); gchar **szSuggestions = g_strsplit(szReplacements, "#", 4); int iSuggestionLen = 0; while (szSuggestions[iSuggestionLen]) iSuggestionLen++; mstk->num_suggestions = iSuggestionLen; mstk->suggestions = szSuggestions; m_vecMistakes.push_back(mstk); } iRet = xmlTextReaderRead(pTxtReader); } xmlFreeTextReader(pTxtReader); xmlCleanupParser(); } AccidenceMistake* LanguageToolWrap::getMistakeList() { AccidenceMistake *mistakes = NULL; AccidenceMistake *mstk_prev = NULL; for (int iMstk = 0; iMstk < m_vecMistakes.size(); iMstk++) { AccidenceMistake *mstk; mstk = m_vecMistakes.at(iMstk); mstk->next = NULL; if (mstk_prev) mstk_prev->next = mstk; else mistakes = mstk; mstk_prev = mstk; } return mistakes; } void LanguageToolWrap::disposeMistakes() { for (int iPos = 0; iPos < m_vecMistakes.size(); iPos++) { AccidenceMistake *pMstk = m_vecMistakes.at(iPos); g_strfreev(pMstk->suggestions); g_free(pMstk); } m_vecMistakes.clear(); } /********************************/ /* Accidence provider functions */ /********************************/ static accidence_check_status languagetool_checker_check_sentence (AccidenceChecker *checker, const char *const sentence) { LanguageToolWrap *lt_checker; g_return_val_if_fail(checker, ACCIDENCE_CHECK_ERROR); lt_checker = (LanguageToolWrap *) checker->grammar_data; if (!lt_checker) return ACCIDENCE_CHECK_ERROR; return lt_checker->checkSentence(sentence); } static AccidenceMistake* languagetool_checker_get_mistakes (AccidenceChecker *checker) { LanguageToolWrap *lt_checker; g_return_val_if_fail(checker, NULL); lt_checker = (LanguageToolWrap *) checker->grammar_data; if (!lt_checker) return NULL; return lt_checker->getMistakeList(); } static void languagetool_checker_dispose_mistakes (AccidenceChecker *checker, AccidenceMistake *mistakes) { LanguageToolWrap *lt_checker; g_return_if_fail(checker); lt_checker = (LanguageToolWrap *) checker->grammar_data; if (!lt_checker) return; lt_checker->disposeMistakes(); // g_debug("DEBUG: language tool mistake dispose\n"); } static AccidenceChecker* languagetool_provider_request_checker(AccidenceProvider *provider, const char *lang_tag) { LanguageToolWrap* lt_checker; g_return_val_if_fail(provider, NULL); g_return_val_if_fail(lang_tag, NULL); if (provider->checker_exists(provider, lang_tag) == 0) return NULL; lt_checker = new LanguageToolWrap(); if (!lt_checker) return NULL; lt_checker->setCheckerLanguage(lang_tag); AccidenceChecker* checker = g_new0(AccidenceChecker, 1); checker->grammar_data = (void *) lt_checker; checker->check_sentence = languagetool_checker_check_sentence; checker->get_mistakes = languagetool_checker_get_mistakes; checker->dispose_mistakes = languagetool_checker_dispose_mistakes; return checker; } static void languagetool_provider_dispose_checker (AccidenceProvider *provider, AccidenceChecker *checker) { LanguageToolWrap *lt_checker; g_return_if_fail(checker); lt_checker = (LanguageToolWrap *) checker->grammar_data; delete lt_checker; g_free(checker); } static void languagetool_provider_dispose (AccidenceProvider *provider) { g_free(provider); } static const char* languagetool_provider_identify (AccidenceProvider *provider) { return "languagetool"; } static const char* languagetool_provider_describe (AccidenceProvider *provider) { return "Language Tool Provider"; } static int languagetool_provider_checker_exists (AccidenceProvider *provider, const char *const lang_tag) { int i; g_return_val_if_fail(lang_tag, 0); for (i = 0; i < supported_lang_count; i++) { if (!g_ascii_strncasecmp(supported_lang[i], lang_tag, 2)) { return 1; } } return 0; } #ifdef __cplusplus extern "C" { #endif AccidenceProvider * init_accidence_provider(void) { AccidenceProvider *provider; provider = g_new0(AccidenceProvider, 1); provider->request_checker = languagetool_provider_request_checker; provider->dispose_checker = languagetool_provider_dispose_checker; provider->dispose = languagetool_provider_dispose; provider->identify = languagetool_provider_identify; provider->describe = languagetool_provider_describe; provider->checker_exists = languagetool_provider_checker_exists; return provider; } #ifdef __cplusplus } // extern "C" #endif