/*************************************************************************/ /* */ /* Use of the link grammar parsing system is subject to the terms of the */ /* license set forth in the LICENSE file included with this software. */ /* This license allows free redistribution and use in source and binary */ /* forms, with or without modification, subject to certain conditions. */ /* */ /*************************************************************************/ package org.linkgrammar; import java.io.File; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.Reader; import java.net.ServerSocket; import java.net.Socket; import java.text.SimpleDateFormat; import java.util.Map; // import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; /** * A simple server implementation for running Link Grammar as a * standalone server. The server accepts parsing requests and returns * the result as a JSON formatted string (see this * JSON website for more information). * There is no session maintained between client and server, it's a * simple, stateless, single round-trip, request-response protocol. * * Requests consist of a bag of parameters separated by the null '\0' * character. Each request must be terminated with the newline '\n' * character. Each parameter has the form name:value\0 where * name and value can contain any character except * '\0' and '\n'. The following parameters are recognized: * *
Return true
if LinkGrammar is initialized for the current thread
* and false
otherwise.
*/
public static boolean isInitialized()
{
return initialized.get();
}
/**
*
* Initialize LinkGrammar for the current thread, if this is not already done. Note that * this method is called by all other methods in this class that invoke LinkGrammar * so there's no really need to call it yourself. It is safe to call the method repeatedly. * Note that the dictionary language/location must be set *before* calling init. *
*/ public static void init() { if (!initialized.get()) { LinkGrammar.init(); initialized.set(Boolean.TRUE); } } /** ** Cleanup allocated memory for use of LinkGrammar in the current thread. *
*/ public static void close() { LinkGrammar.close(); initialized.set(Boolean.FALSE); } private static void trace(String s) { if (verbose) System.out.println("LG " + dateFormatter.format(new java.util.Date()) + " " + s); } /** ** Apply configuration parameters to the parser. *
*/ public static void configure(LGConfig config) { init(); if (config.getMaxCost() > -1.0) LinkGrammar.setMaxCost(config.getMaxCost()); if (config.getMaxParseSeconds() > -1) LinkGrammar.setMaxParseSeconds(config.getMaxParseSeconds()); // XXX DO NOT DO THIS!!! This will royally screw up results! // Setting the link-grammar max linkages to a low number, // e.g. 4, when there are a dozen or more parses, will cause a // RANDOM 4 sentences to be returned out of the dozen, instead // of the top 4. We almost surely do NOT want that! Or rather, // I'm assuming no one actually wants that; we want the top 4!! // So, setting the link-grammar max linkages iss a huge mistake! // if (config.getMaxLinkages() > -1) // LinkGrammar.setMaxLinkages(config.getMaxLinkages()); } /** * AssumingLinkGrammar.parse
has already been called,
* construct a full ParseResult
given the passed in
* configuration. For example, no more that
* config.getMaxLinkages
are returned, etc.
*
* @param config
* @return
*/
public static ParseResult getAsParseResult(LGConfig config)
{
LinkGrammar.makeLinkage(0); // need to call at least once, otherwise it crashes
ParseResult parseResult = new ParseResult();
parseResult.setParserVersion(LinkGrammar.getVersion());
parseResult.setDictVersion(LinkGrammar.getDictVersion());
parseResult.numSkippedWords = LinkGrammar.getNumSkippedWords();
int maxLinkages = Math.min(config.getMaxLinkages(), LinkGrammar.getNumLinkages());
for (int li = 0; li < maxLinkages; li++)
{
LinkGrammar.makeLinkage(li);
Linkage linkage = new Linkage();
linkage.setDisjunctCost(LinkGrammar.getLinkageDisjunctCost());
linkage.setLinkCost(LinkGrammar.getLinkageLinkCost());
linkage.setLinkedWordCount(LinkGrammar.getNumWords());
linkage.setNumViolations(LinkGrammar.getLinkageNumViolations());
String [] disjuncts = new String[LinkGrammar.getNumWords()];
String [] words = new String[LinkGrammar.getNumWords()];
for (int i = 0; i < words.length; i++)
{
disjuncts[i] = LinkGrammar.getLinkageDisjunct(i);
words[i] = LinkGrammar.getLinkageWord(i);
}
linkage.setWords(words);
linkage.setDisjuncts(disjuncts);
int numLinks = LinkGrammar.getNumLinks();
for (int i = 0; i < numLinks; i++)
{
Link link = new Link();
link.setLabel(LinkGrammar.getLinkLabel(i));
link.setLeft(LinkGrammar.getLinkLWord(i));
link.setRight(LinkGrammar.getLinkRWord(i));
link.setLeftLabel(LinkGrammar.getLinkLLabel(i));
link.setRightLabel(LinkGrammar.getLinkRLabel(i));
linkage.getLinks().add(link);
}
if (config.isStoreConstituentString())
linkage.setConstituentString(LinkGrammar.getConstituentString());
if (config.isStoreDiagramString())
linkage.setDiagramString(LinkGrammar.getLinkString());
parseResult.linkages.add(linkage);
}
return parseResult;
}
/**
* Construct a JSON formatted result for a parse which yielded 0 linkages.
*/
public static String getEmptyJSONResult(LGConfig config)
{
StringBuffer buf = new StringBuffer();
buf.append("\"numSkippedWords\":0,");
buf.append("\"linkages\":[],");
buf.append("\"version\":\"" + LinkGrammar.getVersion() + "\",");
buf.append("\"dictVersion\":\"" + LinkGrammar.getDictVersion() + "\"}");
return buf.toString();
}
/**
* Format the current parsing result as a JSON string. This method
* assume that LinkGrammar.parse
has been called before.
*/
public static String getAsJSONFormat(LGConfig config)
{
LinkGrammar.makeLinkage(0); // need to call at least once, otherwise it crashes
int numWords = LinkGrammar.getNumWords();
int maxLinkages = Math.min(config.getMaxLinkages(), LinkGrammar.getNumLinkages());
StringBuffer buf = new StringBuffer();
buf.append("{\"numSkippedWords\":" + LinkGrammar.getNumSkippedWords());
buf.append(",\"linkages\":[");
for (int li = 0; li < maxLinkages; li++)
{
LinkGrammar.makeLinkage(li);
buf.append("{\"words\":[");
for (int i = 0; i < numWords; i++)
{
buf.append(JSONUtils.jsonString(LinkGrammar.getLinkageWord(i)));
if (i + 1 < numWords)
buf.append(",");
}
buf.append("], \"disjuncts\":[");
for (int i = 0; i < numWords; i++)
{
buf.append(JSONUtils.jsonString(LinkGrammar.getLinkageDisjunct(i)));
if (i + 1 < numWords)
buf.append(",");
}
buf.append("], \"disjunctCost\":");
buf.append(Double.toString(LinkGrammar.getLinkageDisjunctCost()));
buf.append(", \"linkageCost\":");
buf.append(Double.toString(LinkGrammar.getLinkageLinkCost()));
buf.append(", \"numViolations\":");
buf.append(Integer.toString(LinkGrammar.getLinkageNumViolations()));
if (config.isStoreConstituentString())
{
buf.append(", \"constituentString\":");
buf.append(JSONUtils.jsonString(LinkGrammar.getConstituentString()));
}
if (config.isStoreDiagramString())
{
buf.append(", \"diagramString\":");
buf.append(JSONUtils.jsonString(LinkGrammar.getLinkString()));
}
buf.append(", \"links\":[");
int numLinks = LinkGrammar.getNumLinks();
for (int i = 0; i < numLinks; i++)
{
buf.append("{\"label\":" + JSONUtils.jsonString(LinkGrammar.getLinkLabel(i)) + ",");
buf.append("\"left\":" + LinkGrammar.getLinkLWord(i) + ",");
buf.append("\"right\":" + LinkGrammar.getLinkRWord(i) + ",");
buf.append("\"leftLabel\":" + JSONUtils.jsonString(LinkGrammar.getLinkLLabel(i)) + ",");
buf.append("\"rightLabel\":" + JSONUtils.jsonString(LinkGrammar.getLinkRLabel(i)) + "}");
if (i + 1 < numLinks)
buf.append(",");
}
buf.append("]");
buf.append("}");
if (li < maxLinkages - 1)
buf.append(",");
}
buf.append("],\"version\":\"" + LinkGrammar.getVersion() + "\"");
buf.append(",\"dictVersion\":\"" + LinkGrammar.getDictVersion() + "\"");
buf.append("}");
return buf.toString();
}
/**
* A stub method for now for implementing a compact binary format
* for parse results.
*
* @param config
* @return
*/
public static byte [] getAsBinary(LGConfig config)
{
int size = 0;
byte [] buf = new byte[1024];
// TODO ..... grow buf as needed
byte [] result = new byte[size];
System.arraycopy(buf, 0, result, 0, size);
return result;
}
private static void handleClient(Socket clientSocket)
{
init();
Reader in = null;
PrintWriter out = null;
JSONUtils msgreader = new JSONUtils();
try
{
trace("Connection accepted from : " + clientSocket.getInetAddress());
in = new InputStreamReader(clientSocket.getInputStream());
Map
* Parse a piece of text with the given configuration and return the ParseResult
.
*
parse
* method is called within the current thread, the dictionary location (if not null
)
* of this parameter will be used to initialize the parser. Otherwise the dictionary location is
* ignored.
* @param text The text to parse, normally a single sentence.
* @return The ParseResult
. Note that null
is never returned. If parsing
* failed, there will be 0 linkages in the result.
*/
public static ParseResult parse(LGConfig config, String text)
{
init();
configure(config);
LinkGrammar.parse(text);
return getAsParseResult(config);
}
public static void main(String [] argv)
{
int threads = 1;
int port = 0;
String dictionaryPath = null;
String language = null;
try
{
int argIdx = 0;
if (argv[argIdx].equals("-verbose")) { verbose = true; argIdx++; }
if (argv[argIdx].equals("-threads")) { threads = Integer.parseInt(argv[++argIdx]); argIdx++; }
port = Integer.parseInt(argv[argIdx++]);
if (argv.length > argIdx)
language = argv[argIdx++];
if (argv.length > argIdx)
dictionaryPath = argv[argIdx++];
}
catch (Throwable ex)
{
if (argv.length > 0)
ex.printStackTrace(System.err);
System.out.println("Usage: java org.linkgrammar.LGService [-verbose] [-threads n] port [language] [dictPath]");
System.out.println("Start a link-grammar parse server on tcp/ip port. The server returns");
System.out.println("JSON-formated parse results. Socket input should be a single sentence");
System.out.println("to parse, preceeded by the identifier \"text:\".\n");
System.out.println(" 'port' The TCP port the service should listen to.");
System.out.println(" -verbose Generate verbose output.");
System.out.println(" -threads Number of concurrent threads/clients allowed (default 1).");
System.out.println(" 'language' Language abbreviation (en, ru, de, lt or fr).");
System.out.println(" 'dictPath' Full path to the Link-Grammar dictionaries.");
System.exit(-1);
}
if (dictionaryPath != null)
{
File f = new File(dictionaryPath);
if (!f.exists())
{ System.err.println("Dictionary path " + dictionaryPath + " not found."); System.exit(-1); }
else if (!f.isDirectory())
{ System.err.println("Dictionary path " + dictionaryPath + " not a directory."); System.exit(-1); }
}
System.out.println("Starting Link Grammar Server at port " + port +
", with " + threads + " available processing threads and " +
((dictionaryPath == null) ? " with default dictionary location." :
"with dictionary location '" + dictionaryPath + "'."));
ThreadPoolExecutor threadPool = new ThreadPoolExecutor(threads,
threads,
Long.MAX_VALUE,
TimeUnit.SECONDS,
new LinkedBlockingQueue