#include #include #include #include #include #include #include "accidence.h" #include "accidence-provider.h" ACCIDENCE_PLUGIN_DECLARE ("LinkGrammar") typedef struct _LGrammarMistake LGrammarMistake; struct _LGrammarMistake { int iStart; int iEnd; int iWord; }; class LinkGrammarWrap { public: LinkGrammarWrap(); ~LinkGrammarWrap(); accidence_check_status checkSentence(const std::string& sentence); AccidenceMistake* getMistakeList(); private: void _purgeMistakes(); Dictionary m_Dict; Parse_Options m_Opts; std::vector m_vecMistake; }; LinkGrammarWrap::LinkGrammarWrap(void) { m_Opts = parse_options_create(); /*m_Dict = dictionary_create("en/4.0.dict", "en/4.0.knowledge", "en/4.0.constituent-knowledge","en/4.0.affix"); */ m_Dict = dictionary_create_lang("en"); parse_options_set_max_parse_time(m_Opts, 1); // 1 second max parse time } LinkGrammarWrap::~LinkGrammarWrap(void) { if (m_Dict) dictionary_delete(m_Dict); if (m_Opts) parse_options_delete(m_Opts); _purgeMistakes(); } accidence_check_status LinkGrammarWrap::checkSentence(const std::string& sentence) { if (!m_Dict) { std::cout << "DEBUG: no dictionary found" << std::endl; return ACCIDENCE_CHECK_ERROR; } // std::cout << "DEBUG: lgrammar: sentence received: " << sentence.c_str() << std::endl; Sentence sent = sentence_create(const_cast(sentence.c_str()),m_Dict); if (!sent) return ACCIDENCE_CHECK_ERROR; // clean mistake vector _purgeMistakes(); // First parse with cost 0 or 1 and no null links parse_options_set_disjunct_cost(m_Opts, 2); parse_options_set_min_null_count(m_Opts, 0); parse_options_set_max_null_count(m_Opts, 0); parse_options_set_islands_ok(m_Opts, 0); parse_options_set_panic_mode(m_Opts, true); parse_options_reset_resources(m_Opts); int num_linkages = sentence_parse(sent, m_Opts); bool res = (num_linkages >= 1); if (true == parse_options_timer_expired(m_Opts)) { res= true; // Mark valid if it's too hard. FIXME. We can attempt to recover // by tweaking parameters once we know what we're doing. } std::string errStr = ""; if (!res && (num_linkages == 0)) { // Now proces with NULL links. to find out what went wrong. parse_options_set_min_null_count(m_Opts, 1); parse_options_set_max_null_count(m_Opts, sentence_length(sent)); parse_options_set_islands_ok(m_Opts, 1); parse_options_reset_resources(m_Opts); num_linkages = sentence_parse(sent, m_Opts); } if (!res) { std::vector vecMapOfWords; // // Get first linkage // LGrammarMistake * pErr = NULL; if (num_linkages > 0) { Linkage linkage = linkage_create(0, sent, m_Opts); if (linkage != NULL) { int i = 0; int iLow= 0; int iHigh= 0; int iOff = 0; // hard coded const char * szSent = sentence.c_str(); int totlen = strlen(szSent); for (i=1; i= totlen) { break; } LGrammarMistake * mstk = NULL; mstk = new LGrammarMistake; mstk->iStart = iLow; mstk->iEnd = iLow + strlen(sentence_get_nth_word(sent, i)); mstk->iWord = i; vecMapOfWords.push_back(mstk); bool bNew = false; if (!sentence_nth_word_has_disjunction(sent, i)) { if (pErr == NULL) { pErr = new LGrammarMistake; pErr->iWord = 0; bNew = true; } if (bNew || (pErr->iWord + 1 < i)) { if (!bNew) { pErr = new LGrammarMistake; } iHigh = iLow + strlen(sentence_get_nth_word(sent, i)); pErr->iStart = iLow + iOff -1; pErr->iEnd = iHigh + iOff -1; if (pErr->iStart < 0) { pErr->iStart = 0; } if (pErr->iEnd < totlen-1) { pErr->iEnd += 1; } pErr->iWord = i; m_vecMistake.push_back(pErr); } else { // // Expand the squiggle // iHigh = iLow + strlen(sentence_get_nth_word(sent, i)) + iOff; pErr->iEnd = iHigh; if (pErr->iEnd < totlen-1) { pErr->iEnd += 1; } pErr->iWord = i; } } iLow += strlen(sentence_get_nth_word(sent, i)); } // // No NULL links but still an error , mark the whole sentence bad. // if (m_vecMistake.size() == 0) { pErr = new LGrammarMistake; pErr->iStart = 0; pErr->iEnd = sentence.length(); if(pErr->iStart < 0) { pErr->iStart = 0; } m_vecMistake.push_back(pErr); } const char *sErr = linkage_get_violation_name(linkage); int count = linkage_get_num_sublinkages(linkage); // // Find linkages with violations // for (i=0; i < count; i++) { int iok = linkage_set_current_sublinkage(linkage, i); if(iok == 0) continue; int j = 0; int iNum = linkage_get_num_links(linkage); for (j=0; j < iNum; j++) // char * szViolation = linkage_get_violation_name(linkage); //if(szViolation != NULL) { int iLword = linkage_get_link_lword(linkage,j); const char * szLword = linkage_get_word(linkage,iLword); int iRword = linkage_get_link_rword(linkage,j); const char * szRword = linkage_get_word(linkage,iRword); } } linkage_delete(linkage); for (i=0; i < vecMapOfWords.size(); i++) { LGrammarMistake * p = vecMapOfWords.at(i); delete p; } vecMapOfWords.clear(); } } else { pErr = new LGrammarMistake; pErr->iStart = 0; pErr->iEnd = sentence.length(); if (pErr->iStart < 0) { pErr->iStart = 0; } m_vecMistake.push_back(pErr); } } sentence_delete(sent); if (res) return ACCIDENCE_CHECK_ERROR; if (m_vecMistake.size()) return ACCIDENCE_CHECK_MISTAKES; return ACCIDENCE_CHECK_OK; } static AccidenceMistake* convertMistake (LGrammarMistake *lgr_mstk) { AccidenceMistake *mstk = NULL; mstk = g_new0(AccidenceMistake, 1); mstk->pos_start = lgr_mstk->iStart; mstk->pos_end = lgr_mstk->iEnd; mstk->message = strdup("no mistake description"); mstk->num_suggestions = 0; mstk->suggestions = NULL; return mstk; } AccidenceMistake* LinkGrammarWrap::getMistakeList() { AccidenceMistake *mistakes = NULL; AccidenceMistake *mstk_prev = NULL; for (int iMstk = 0; iMstk < m_vecMistake.size(); iMstk++) { AccidenceMistake *mstk = g_new0(AccidenceMistake, 1); mstk = convertMistake(m_vecMistake.at(iMstk)); mstk->next = NULL; if (mstk_prev) mstk_prev->next = mstk; else mistakes = mstk; mstk_prev = mstk; } return mistakes; } void LinkGrammarWrap::_purgeMistakes() { std::vector::iterator itVectorData; for (itVectorData = m_vecMistake.begin(); itVectorData < m_vecMistake.end(); itVectorData++) { delete *itVectorData; } m_vecMistake.clear(); } /******************************/ /* Accidence module functions */ /******************************/ static accidence_check_status linkgrammar_checker_check_sentence (AccidenceChecker *checker, const char *const sentence) { LinkGrammarWrap *lg_checker; lg_checker = (LinkGrammarWrap *) checker->grammar_data; if (!lg_checker) return ACCIDENCE_CHECK_ERROR; return lg_checker->checkSentence(sentence); } static AccidenceMistake* linkgrammar_checker_get_mistakes (AccidenceChecker *acc_checker) { LinkGrammarWrap *lg_checker; lg_checker = (LinkGrammarWrap *) acc_checker->grammar_data; if (!lg_checker) return NULL; return lg_checker->getMistakeList(); } static void linkgrammar_checker_dispose_mistakes (AccidenceChecker *acc_checker, AccidenceMistake *mistakes) { AccidenceMistake* p_mistake; while (mistakes) { p_mistake = mistakes; mistakes = mistakes->next; g_free(p_mistake->message); g_free(p_mistake); } } static void linkgrammar_provider_dispose (AccidenceProvider * provider) { g_free (provider); } static AccidenceChecker* linkgrammar_provider_request_checker(AccidenceProvider *provider, const char *lang_tag) { LinkGrammarWrap* lg_checker; if (provider->checker_exists(provider, lang_tag) == 0) return NULL; lg_checker = new LinkGrammarWrap(); if (!lg_checker) return NULL; AccidenceChecker *checker = g_new0(AccidenceChecker, 1); checker->grammar_data = (void *) lg_checker; checker->check_sentence = linkgrammar_checker_check_sentence; checker->get_mistakes = linkgrammar_checker_get_mistakes; checker->dispose_mistakes = linkgrammar_checker_dispose_mistakes; return checker; } static void linkgrammar_provider_dispose_checker (AccidenceProvider *provider, AccidenceChecker *acc_checker) { // printf("DEBUG: link grammar checker dispose\n"); LinkGrammarWrap *lg_checker; lg_checker = (LinkGrammarWrap *) acc_checker->grammar_data; delete lg_checker; g_free (acc_checker); } static const char * linkgrammar_provider_identify (AccidenceProvider *me) { return "linkgrammar"; } static const char* linkgrammar_provider_describe (AccidenceProvider * me) { return "Link-Grammar Provider"; } static int linkgrammar_provider_checker_exists (AccidenceProvider * me, const char *const tag) { if (!strcmp ("en", tag) || !strcmp ("en_GB", tag)) { return 1; } return 0; } extern "C" { AccidenceProvider * init_accidence_provider(void) { AccidenceProvider *provider; provider = g_new0(AccidenceProvider, 1); provider->request_checker = linkgrammar_provider_request_checker; provider->dispose_checker = linkgrammar_provider_dispose_checker; provider->dispose = linkgrammar_provider_dispose; provider->identify = linkgrammar_provider_identify; provider->describe = linkgrammar_provider_describe; provider->checker_exists = linkgrammar_provider_checker_exists; return provider; } } // extern "C"