edu.illinois.cs.cogcomp.ner.ClassifiersAndUtils.StopWords java code examples

  public Document nextDoc(int initClassID) {
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      if (words.size() > 0)
        return new Document(words, initClassID);
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    return null;
  }
}

public static void initTopicClassifier(String pathToTopicData, String[] fileNames,
    String[] _labelnames) {
  map = new FeatureMap();
  labelnames = new String[_labelnames.length + 1];
  labelnames[0] = "UNKNOWN";
  for (int i = 0; i < _labelnames.length; i++)
    labelnames[1 + i] = _labelnames[i];
  DocumentCollection docs = new DocumentCollection();
  StopWords stops = new StopWords(pathToStopWords);
  for (int i = 0; i < fileNames.length; i++)
    docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false,
        "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~");
  map.addDocs(docs, 20, false);
  NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5);
  cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20);
  // System.exit(0);
  nb = new MemoryEfficientNB(docs, map, fileNames.length);
}

public static void initTopicClassifier(String pathToTopicData, String[] fileNames,
    String[] _labelnames) {
  map = new FeatureMap();
  labelnames = new String[_labelnames.length + 1];
  labelnames[0] = "UNKNOWN";
  for (int i = 0; i < _labelnames.length; i++)
    labelnames[1 + i] = _labelnames[i];
  DocumentCollection docs = new DocumentCollection();
  StopWords stops = new StopWords(pathToStopWords);
  for (int i = 0; i < fileNames.length; i++)
    docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false,
        "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~");
  map.addDocs(docs, 20, false);
  NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5);
  cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20);
  // System.exit(0);
  nb = new MemoryEfficientNB(docs, map, fileNames.length);
}

  public Document nextDoc(int initClassID) {
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      if (words.size() > 0)
        return new Document(words, initClassID);
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    return null;
  }
}

public static void initTopicClassifier(String pathToTopicData, String[] fileNames,
    String[] _labelnames) {
  map = new FeatureMap();
  labelnames = new String[_labelnames.length + 1];
  labelnames[0] = "UNKNOWN";
  for (int i = 0; i < _labelnames.length; i++)
    labelnames[1 + i] = _labelnames[i];
  DocumentCollection docs = new DocumentCollection();
  StopWords stops = new StopWords(pathToStopWords);
  for (int i = 0; i < fileNames.length; i++)
    docs.addDocuments(pathToTopicData + "/" + fileNames[i], i, stops, false,
        "\n\t -.,?<>;':\"[]{}\\|`~!@#$%^&*()_+=-0987654321`~");
  map.addDocs(docs, 20, false);
  NfoldCrossvalidation cv = new NfoldCrossvalidation(docs, 5);
  cv.printNfoldCorrssvalidationNbAcc(fileNames.length, -1, 20);
  // System.exit(0);
  nb = new MemoryEfficientNB(docs, map, fileNames.length);
}

  public Document nextDoc(int initClassID) {
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      if (words.size() > 0)
        return new Document(words, initClassID);
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    return null;
  }
}

public void addDocuments(String filename, int classID, StopWords stops,
    boolean discardFirstToken, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  Vector<String> words = in.readLineTokens(tokenizationDelimiters);
  if ((discardFirstToken) && (words != null) && (words.size() > 0))
    words.removeElementAt(0);
  if (stops != null)
    words = stops.filterStopWords(words);
  while (words != null) {
    if (words.size() >= 0)
      docs.addElement(new Document(words, classID));
    words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
  }
}

public void addDocuments(String filename, int classID, StopWords stops,
    boolean discardFirstToken, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  Vector<String> words = in.readLineTokens(tokenizationDelimiters);
  if ((discardFirstToken) && (words != null) && (words.size() > 0))
    words.removeElementAt(0);
  if (stops != null)
    words = stops.filterStopWords(words);
  while (words != null) {
    if (words.size() >= 0)
      docs.addElement(new Document(words, classID));
    words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
  }
}

public void addDocuments(String filename, int classID, StopWords stops,
    boolean discardFirstToken, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  Vector<String> words = in.readLineTokens(tokenizationDelimiters);
  if ((discardFirstToken) && (words != null) && (words.size() > 0))
    words.removeElementAt(0);
  if (stops != null)
    words = stops.filterStopWords(words);
  while (words != null) {
    if (words.size() >= 0)
      docs.addElement(new Document(words, classID));
    words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
  }
}

public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken,
    String tokenizationDelimiters) {
  String[] files = (new File(path)).list();
  for (String file : files) {
    InFile in = new InFile(path + "/" + file);
    Vector<String> allWords = new Vector<>();
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      for (int j = 0; j < words.size(); j++)
        allWords.addElement(words.elementAt(j));
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    docs.addElement(new Document(allWords, classID));
  }
}

public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken,
    String tokenizationDelimiters) {
  String[] files = (new File(path)).list();
  for (String file : files) {
    InFile in = new InFile(path + "/" + file);
    Vector<String> allWords = new Vector<>();
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      for (int j = 0; j < words.size(); j++)
        allWords.addElement(words.elementAt(j));
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    docs.addElement(new Document(allWords, classID));
  }
}

public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken,
    String tokenizationDelimiters) {
  String[] files = (new File(path)).list();
  for (String file : files) {
    InFile in = new InFile(path + "/" + file);
    Vector<String> allWords = new Vector<>();
    Vector<String> words = in.readLineTokens(tokenizationDelimiters);
    if ((discardFirstToken) && (words != null) && (words.size() > 0))
      words.removeElementAt(0);
    if (stops != null)
      words = stops.filterStopWords(words);
    while (words != null) {
      for (int j = 0; j < words.size(); j++)
        allWords.addElement(words.elementAt(j));
      words = in.readLineTokens(tokenizationDelimiters);
      if ((discardFirstToken) && (words != null) && (words.size() > 0))
        words.removeElementAt(0);
      if (stops != null)
        words = stops.filterStopWords(words);
    }
    docs.addElement(new Document(allWords, classID));
  }
}

public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  this.classID = _classID;
  words = new Vector<>();
  Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters);
  while (currentWords != null) {
    if (stops != null)
      currentWords = stops.filterStopWords(currentWords);
    for (int j = 0; j < currentWords.size(); j++)
      words.addElement(currentWords.elementAt(j));
    currentWords = in.readLineTokens(tokenizationDelimiters);
  }
  words.trimToSize();
}

public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  this.classID = _classID;
  words = new Vector<>();
  Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters);
  while (currentWords != null) {
    if (stops != null)
      currentWords = stops.filterStopWords(currentWords);
    for (int j = 0; j < currentWords.size(); j++)
      words.addElement(currentWords.elementAt(j));
    currentWords = in.readLineTokens(tokenizationDelimiters);
  }
  words.trimToSize();
}

public Document(String filename, int _classID, StopWords stops, String tokenizationDelimiters) {
  InFile in = new InFile(filename);
  this.classID = _classID;
  words = new Vector<>();
  Vector<String> currentWords = in.readLineTokens(tokenizationDelimiters);
  while (currentWords != null) {
    if (stops != null)
      currentWords = stops.filterStopWords(currentWords);
    for (int j = 0; j < currentWords.size(); j++)
      words.addElement(currentWords.elementAt(j));
    currentWords = in.readLineTokens(tokenizationDelimiters);
  }
  words.trimToSize();
}

Most used methods

Popular in Java

Updating database using SQL prepared statement
scheduleAtFixedRate (Timer)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
putExtra (Intent)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
String (java.lang)
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Reference (javax.naming)
Best IntelliJ plugins

How to useStopWords in edu.illinois.cs.cogcomp.ner.ClassifiersAndUtils

Best Java code snippets using edu.illinois.cs.cogcomp.ner.ClassifiersAndUtils.StopWords (Showing top 15 results out of 315)

How to use
StopWords
in
edu.illinois.cs.cogcomp.ner.ClassifiersAndUtils