edu.stanford.nlp.trees.AbstractTreebankLanguagePack java code examples

@Override
public String basicCategory(String category) {
 String basicCat;
 if (leaveGF) {
  basicCat = stripGF(category);
 } else {
  basicCat = super.basicCategory(category);
 }
 // log.info("NPLP stripping " + category + " with leaveGF = " + leaveGF + " gives " + basicCat);
 return basicCat;
}

/**
 * Return a GrammaticalStructureFactory suitable for this language/treebank.
 * (To be overridden in subclasses.)
 *
 * @return A GrammaticalStructureFactory suitable for this language/treebank
 */
@Override
public GrammaticalStructureFactory grammaticalStructureFactory(Predicate<String> puncFilt) {
 return grammaticalStructureFactory();
}

@Override
public TokenizerFactory<? extends HasWord> getTokenizerFactory() {
 if (tf != null) {
  return tf;
 } else {
  return super.getTokenizerFactory();
 }
}

LexicalizedParser lexParser = (LexicalizedParser) pd;
for (String tag : lexParser.tagIndex) {
  String t = lp.basicCategory(tag);
  int gfIdx = t.indexOf(lp.getGfCharacter());
  if (gfIdx > 0) {
  posTags.add(lp.basicCategory(t));
String t = lp.basicCategory(tag);
  int gfIdx = t.indexOf(lp.getGfCharacter());
  if (gfIdx > 0) {
gsf = lp.grammaticalStructureFactory(lp.punctuationWordRejectFilter(),
    lp.typedDependencyHeadFinder());

for (int leng = category.length(); i < leng; i++) {
 char ch = category.charAt(i);
 if (isLabelAnnotationIntroducingCharacter(ch)) {
  if (i == 0) {
   sawAtZero = true;

/** {@inheritDoc} */
@Override
public TokenizerFactory<Tree> treeTokenizerFactory() {
 return new TreeTokenizerFactory(treeReaderFactory());
}

/**
 * Returns a String array of punctuation tags that EVALB-style evaluation
 * should ignore for this treebank/language.
 * Traditionally, EVALB has ignored a subset of the total set of
 * punctuation tags in the English Penn Treebank (quotes and
 * period, comma, colon, etc., but not brackets)
 *
 * @return Whether this is a EVALB-ignored punctuation tag
 */
@Override
public String[] evalBIgnoredPunctuationTags() {
 return punctuationTags();
}

/**
 * Returns a String which is the first (perhaps unique) start symbol
 * of the treebank, or null if none is defined.
 *
 * @return The start symbol
 */
@Override
public String startSymbol() {
 String[] ssyms = startSymbols();
 if (ssyms == null || ssyms.length == 0) {
  return null;
 }
 return ssyms[0];
}

/**
 * Say whether this character is an annotation introducing
 * character.
 *
 * @param ch The character to check
 * @return Whether it is an annotation introducing character
 */
@Override
public boolean isLabelAnnotationIntroducingCharacter(char ch) {
 char[] cutChars = labelAnnotationIntroducingCharacters();
 for (char cutChar : cutChars) {
  if (ch == cutChar) {
   return true;
  }
 }
 return false;
}

/**
 * Returns the basic syntactic category of a String.
 * This implementation basically truncates
 * stuff after an occurrence of one of the
 * <code>labelAnnotationIntroducingCharacters()</code>.
 * However, there is also special case stuff to deal with
 * labelAnnotationIntroducingCharacters in category labels:
 * (i) if the first char is in this set, it's never truncated
 * (e.g., '-' or '=' as a token), and (ii) if it starts with
 * one of this set, a second instance of the same item from this set is
 * also excluded (to deal with '-LLB-', '-RCB-', etc.).
 *
 * @param category The whole String name of the label
 * @return The basic category of the String
 */
@Override
public String basicCategory(String category) {
 if (category == null) {
  return null;
 }
 return category.substring(0, postBasicCategoryIndex(category));
}

/**
 * Returns the syntactic category and 'function' of a String.
 * This normally involves truncating numerical coindexation
 * showing coreference, etc.  By 'function', this means
 * keeping, say, Penn Treebank functional tags or ICE phrasal functions,
 * perhaps returning them as <code>category-function</code>.
 * <p/>
 * This implementation strips numeric tags after label introducing
 * characters (assuming that non-numeric things are functional tags).
 *
 * @param category The whole String name of the label
 * @return A String giving the category and function
 */
@Override
public String categoryAndFunction(String category) {
 if (category == null) {
  return null;
 }
 String catFunc = category;
 int i = lastIndexOfNumericTag(catFunc);
 while (i >= 0) {
  catFunc = catFunc.substring(0, i);
  i = lastIndexOfNumericTag(catFunc);
 }
 return catFunc;
}

if (isLabelAnnotationIntroducingCharacter(category.charAt(i))) {
 boolean onlyDigitsFollow = false;
 for (int j = i + 1; j < category.length(); j++) {

/** {@inheritDoc} */
@Override
public TokenizerFactory<Tree> treeTokenizerFactory() {
 return new TreeTokenizerFactory(treeReaderFactory());
}

/**
 * Returns a String array of punctuation tags that EVALB-style evaluation
 * should ignore for this treebank/language.
 * Traditionally, EVALB has ignored a subset of the total set of
 * punctuation tags in the English Penn Treebank (quotes and
 * period, comma, colon, etc., but not brackets)
 *
 * @return Whether this is a EVALB-ignored punctuation tag
 */
public String[] evalBIgnoredPunctuationTags() {
 return punctuationTags();
}

/**
 * Returns a String which is the first (perhaps unique) start symbol
 * of the treebank, or null if none is defined.
 *
 * @return The start symbol
 */
public String startSymbol() {
 String[] ssyms = startSymbols();
 if (ssyms == null || ssyms.length == 0) {
  return null;
 }
 return ssyms[0];
}

/**
 * Say whether this character is an annotation introducing
 * character.
 *
 * @param ch The character to check
 * @return Whether it is an annotation introducing character
 */
@Override
public boolean isLabelAnnotationIntroducingCharacter(char ch) {
 char[] cutChars = labelAnnotationIntroducingCharacters();
 for (char cutChar : cutChars) {
  if (ch == cutChar) {
   return true;
  }
 }
 return false;
}

/**
 * Returns the basic syntactic category of a String.
 * This implementation basically truncates
 * stuff after an occurrence of one of the
 * <code>labelAnnotationIntroducingCharacters()</code>.
 * However, there is also special case stuff to deal with
 * labelAnnotationIntroducingCharacters in category labels:
 * (i) if the first char is in this set, it's never truncated
 * (e.g., '-' or '=' as a token), and (ii) if it starts with
 * one of this set, a second instance of the same item from this set is
 * also excluded (to deal with '-LLB-', '-RCB-', etc.).
 *
 * @param category The whole String name of the label
 * @return The basic category of the String
 */
@Override
public String basicCategory(String category) {
 if (category == null) {
  return null;
 }
 return category.substring(0, postBasicCategoryIndex(category));
}

/**
 * Returns the syntactic category and 'function' of a String.
 * This normally involves truncating numerical coindexation
 * showing coreference, etc.  By 'function', this means
 * keeping, say, Penn Treebank functional tags or ICE phrasal functions,
 * perhaps returning them as <code>category-function</code>.
 * <p/>
 * This implementation strips numeric tags after label introducing
 * characters (assuming that non-numeric things are functional tags).
 *
 * @param category The whole String name of the label
 * @return A String giving the category and function
 */
@Override
public String categoryAndFunction(String category) {
 if (category == null) {
  return null;
 }
 String catFunc = category;
 int i = lastIndexOfNumericTag(catFunc);
 while (i >= 0) {
  catFunc = catFunc.substring(0, i);
  i = lastIndexOfNumericTag(catFunc);
 }
 return catFunc;
}

/**
 * Return a GrammaticalStructureFactory suitable for this language/treebank.
 * (To be overridden in subclasses.)
 *
 * @return A GrammaticalStructureFactory suitable for this language/treebank
 */
@Override
public GrammaticalStructureFactory grammaticalStructureFactory(Predicate<String> puncFilt, HeadFinder typedDependencyHeadFinder) {
 return grammaticalStructureFactory();
}

@Override
public String basicCategory(String category) {
 String basicCat = super.basicCategory(category);
 if(!leaveGF) {
  basicCat = stripGF(basicCat);
 }
 return basicCat;
}

Javadoc

This provides an implementation of parts of the TreebankLanguagePack API to reduce the load on fresh implementations. Only the abstract methods below need to be implemented to give a reasonable solution for a new language.

Most used methods

basicCategory
Returns the basic syntactic category of a String. This implementation basically truncates stuff afte
grammaticalStructureFactory
Return a GrammaticalStructureFactory suitable for this language/treebank. (To be overridden in subcl
getTokenizerFactory
Return a tokenizer which might be suitable for tokenizing text that will be used with this Treebank/
isLabelAnnotationIntroducingCharacter
Say whether this character is an annotation introducing character.
labelAnnotationIntroducingCharacters
Return an array of characters at which a String should be truncated to give the basic syntactic cate
lastIndexOfNumericTag
Returns the index within this string of the last occurrence of a isLabelAnnotationIntroducingCharact
postBasicCategoryIndex
Returns the index of the first character that is after the basic label. That is, if category is "NP-
punctuationTags
Returns a String array of punctuation tags for this treebank/language.
startSymbols
Returns a String array of treebank start symbols.
treeReaderFactory
getGfCharacter
punctuationWordRejectFilter
Returns a filter that accepts a String that is not a punctuation word, and rejects punctuation. If o

Popular in Java

Parsing JSON documents to java classes using gson
setScale (BigDecimal)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getContentResolver (Context)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Top PhpStorm plugins

How to useAbstractTreebankLanguagePack in edu.stanford.nlp.trees

Best Java code snippets using edu.stanford.nlp.trees.AbstractTreebankLanguagePack (Showing top 20 results out of 315)

How to use
AbstractTreebankLanguagePack
in
edu.stanford.nlp.trees