opennlp.tools.sentdetect.DefaultSDContextGenerator java code examples

public SDContextGenerator createSentenceContextGenerator(
  Set<String> abbreviations, char[] customEOSCharacters) {
 return new DefaultSDContextGenerator(abbreviations, customEOSCharacters);
}

 if (position < lastIndex && StringUtil.isWhitespace(sb.charAt(position + 1)))
  collectFeats.add("sn");
 collectFeats.add("eos=" + escapeChar(sb.charAt(position)));
int prefixStart = previousSpaceIndex(sb, position);
int prevStart = previousSpaceIndex(sb, prefixStart);
previous = String.valueOf(sb.subSequence(prevStart, prefixStart)).trim();
int suffixEnd = nextSpaceIndex(sb, position, lastIndex);
int nextEnd = nextSpaceIndex(sb, suffixEnd + 1, lastIndex + 1);
if (position == lastIndex) {
 suffix = "";
collectFeatures(prefix,suffix,previous,next, sb.charAt(position));

if (!prefix.equals("")) {
 collectFeats.add(Integer.toString(prefix.length()));
 if (isFirstUpper(prefix)) {
  collectFeats.add("xcap");
buf.setLength(0);
if (!previous.equals("")) {
 if (isFirstUpper(previous)) {
  collectFeats.add("vcap");
buf.setLength(0);
if (!suffix.equals("")) {
 if (isFirstUpper(suffix)) {
  collectFeats.add("scap");
buf.setLength(0);
if (!next.equals("")) {
 if (isFirstUpper(next)) {
  collectFeats.add("ncap");

/**
 * Determines some of the features for the sentence detector and adds them to list features.
 *
 * @param prefix String preceding the eos character in the eos token.
 * @param suffix String following the eos character in the eos token.
 * @param previous Space delimited token preceding token containing eos character.
 * @param next Space delimited token following token containing eos character.
 *
 * @deprecated use {@link #collectFeatures(String, String, String, String, Character)} instead.
 */
protected void collectFeatures(String prefix, String suffix, String previous, String next) {
 collectFeatures(prefix, suffix, previous, next, null);
}

/**
 * Determines some of the features for the sentence detector and adds them to list features.
 *
 * @param prefix String preceding the eos character in the eos token.
 * @param suffix String following the eos character in the eos token.
 * @param previous Space delimited token preceding token containing eos character.
 * @param next Space delimited token following token containing eos character.
 *
 * @deprecated use {@link #collectFeatures(String, String, String, String, Character)} instead.
 */
protected void collectFeatures(String prefix, String suffix, String previous, String next) {
 collectFeatures(prefix, suffix, previous, next, null);
}

public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
 if ("th".equals(languageCode) || "tha".equals(languageCode)) {
  return new SentenceContextGenerator();
 } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
  return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
 }
 return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters);
}

 if (position < lastIndex && StringUtil.isWhitespace(sb.charAt(position + 1)))
  collectFeats.add("sn");
 collectFeats.add("eos=" + escapeChar(sb.charAt(position)));
int prefixStart = previousSpaceIndex(sb, position);
int prevStart = previousSpaceIndex(sb, prefixStart);
previous = String.valueOf(sb.subSequence(prevStart, prefixStart)).trim();
int suffixEnd = nextSpaceIndex(sb, position, lastIndex);
int nextEnd = nextSpaceIndex(sb, suffixEnd + 1, lastIndex + 1);
if (position == lastIndex) {
 suffix = "";
collectFeatures(prefix,suffix,previous,next, sb.charAt(position));

/**
 * Determines some of the features for the sentence detector and adds them to list features.
 *
 * @param prefix String preceding the eos character in the eos token.
 * @param suffix String following the eos character in the eos token.
 * @param previous Space delimited token preceding token containing eos character.
 * @param next Space delimited token following token containing eos character.
 *
 * @deprecated use {@link #collectFeatures(String, String, String, String, Character)} instead.
 */
protected void collectFeatures(String prefix, String suffix, String previous, String next) {
 collectFeatures(prefix, suffix, previous, next, null);
}

if (!prefix.equals("")) {
 collectFeats.add(Integer.toString(prefix.length()));
 if (isFirstUpper(prefix)) {
  collectFeats.add("xcap");
buf.setLength(0);
if (!previous.equals("")) {
 if (isFirstUpper(previous)) {
  collectFeats.add("vcap");
buf.setLength(0);
if (!suffix.equals("")) {
 if (isFirstUpper(suffix)) {
  collectFeats.add("scap");
buf.setLength(0);
if (!next.equals("")) {
 if (isFirstUpper(next)) {
  collectFeats.add("ncap");

 @Test
 public void testGetContextWithAbbreviations() throws Exception {
  SDContextGenerator sdContextGenerator =
    new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))),
      Factory.defaultEosCharacters);

  String[] context = sdContextGenerator.getContext(
    "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
  Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context);

  context = sdContextGenerator.getContext(
    "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
  Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
 }
}

 if (position < lastIndex && StringUtil.isWhitespace(sb.charAt(position + 1)))
  collectFeats.add("sn");
 collectFeats.add("eos=" + escapeChar(sb.charAt(position)));
int prefixStart = previousSpaceIndex(sb, position);
int prevStart = previousSpaceIndex(sb, prefixStart);
previous = String.valueOf(sb.subSequence(prevStart, prefixStart)).trim();
int suffixEnd = nextSpaceIndex(sb, position, lastIndex);
int nextEnd = nextSpaceIndex(sb, suffixEnd + 1, lastIndex + 1);
if (position == lastIndex) {
 suffix = "";
collectFeatures(prefix,suffix,previous,next, sb.charAt(position));

if (!prefix.equals("")) {
 collectFeats.add(Integer.toString(prefix.length()));
 if (isFirstUpper(prefix)) {
  collectFeats.add("xcap");
buf.setLength(0);
if (!previous.equals("")) {
 if (isFirstUpper(previous)) {
  collectFeats.add("vcap");
buf.setLength(0);
if (!suffix.equals("")) {
 if (isFirstUpper(suffix)) {
  collectFeats.add("scap");
buf.setLength(0);
if (!next.equals("")) {
 if (isFirstUpper(next)) {
  collectFeats.add("ncap");

@Test
public void testGetContext() throws Exception {
 SDContextGenerator sdContextGenerator =
   new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters);
 String[] context = sdContextGenerator.getContext(
   "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
 Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context);
 context = sdContextGenerator.getContext(
   "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
 Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
}

public SDContextGenerator createSentenceContextGenerator(
  Set<String> abbreviations, char[] customEOSCharacters) {
 return new DefaultSDContextGenerator(abbreviations, customEOSCharacters);
}

public SDContextGenerator createSentenceContextGenerator(
  Set<String> abbreviations, char[] customEOSCharacters) {
 return new DefaultSDContextGenerator(abbreviations, customEOSCharacters);
}

public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
 if ("th".equals(languageCode) || "tha".equals(languageCode)) {
  return new SentenceContextGenerator();
 } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
  return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
 }
 return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters);
}

public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) {
 if ("th".equals(languageCode) || "tha".equals(languageCode)) {
  return new SentenceContextGenerator();
 } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
  return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
 }
 return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters);
}

@Override
public void initialize(UimaContext aContext)
     throws ResourceInitializationException {
   super.initialize(aContext);
   try (InputStream is = FileLocator.getAsStream(sdModelPath)){
    logger.info("Sentence detector model file: " + sdModelPath);
    sdmodel = new SentenceModel(is);
    EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
    DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters());
    sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss);
    skipSegmentsSet = new HashSet<>();
    if(skipSegmentsArray != null){
     Collections.addAll(skipSegmentsSet, skipSegmentsArray);
    }
 } catch (IOException e) {
  e.printStackTrace();
  throw new ResourceInitializationException(e);
 }
 }

@Override
public void initialize(UimaContext aContext)
     throws ResourceInitializationException {
   super.initialize(aContext);
   try (InputStream is = FileLocator.getAsStream(sdModelPath)){
    logger.info("Sentence detector model file: " + sdModelPath);
    sdmodel = new SentenceModel(is);
    EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
    DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters());
    sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss);
    skipSegmentsSet = new HashSet<>();
    if(skipSegmentsArray != null){
     Collections.addAll(skipSegmentsSet, skipSegmentsArray);
    }
 } catch (IOException e) {
  e.printStackTrace();
  throw new ResourceInitializationException(e);
 }
 }

/**
 * Reads configuration parameters.
 * 
 * @throws ResourceAccessException
 * @throws IOException 
 * @throws InvalidFormatException 
 */
private void configInit() throws ResourceAccessException, InvalidFormatException, IOException {
  String sdModelPath = (String) context
      .getConfigParameterValue(SD_MODEL_FILE_PARAM);
    InputStream is = FileLocator.getAsStream(sdModelPath);
    logger.info("Sentence detector model file: " + sdModelPath);
    sdmodel = new SentenceModel(is);
    is.close();
    EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
    char[] eosc = eoss.getEndOfSentenceCharacters();
    // SentenceDContextGenerator cg = new SentenceDContextGenerator();
    DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc);
    sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss);
    skipSegmentsSet = ParamUtil.getStringParameterValuesSet(
        PARAM_SEGMENTS_TO_SKIP, context);
    // vng change begin
    paragraphPattern = compilePatternCheck("paragraphPattern",
        PARAGRAPH_PATTERN);
    splitPattern = compilePatternCheck("splitPattern", SPLIT_PATTERN);
    periodPattern = compilePatternCheck("periodPattern", PERIOD_PATTERN);
    acronymPattern = compilePatternCheck("acronymPattern", ACRONYM_PATTERN);
    // vng change end
}
/**

Javadoc

Generate event contexts for maxent decisions for sentence detection.

Most used methods

<init>
Creates a new SDContextGenerator instance with no induced abbreviations.
collectFeatures
Determines some of the features for the sentence detector and adds them to list features.
escapeChar
isFirstUpper
nextSpaceIndex
Finds the index of the nearest space after a specified index.
previousSpaceIndex
Finds the index of the nearest space before a specified index which is not itself preceded by a spac

Popular in Java

Updating database using SQL prepared statement
setScale (BigDecimal)
onRequestPermissionsResult (Fragment)
onCreateOptionsMenu (Activity)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Top Vim plugins

How to useDefaultSDContextGenerator in opennlp.tools.sentdetect

Best Java code snippets using opennlp.tools.sentdetect.DefaultSDContextGenerator (Showing top 20 results out of 315)

How to use
DefaultSDContextGenerator
in
opennlp.tools.sentdetect