try { StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(htmlInst); Instances dataFiltered = Filter.useFilter(htmlInst, filter); } catch (Exception e) { System.err.println("Exception caught during formatting: " + e.getMessage()); return; }
StringToWordVector stwv = new StringToWordVector(); stwv.setInputFormat(data); try { fsource = Filter.useFilter(data, stwv); } catch (Exception e) { e.printStackTrace(); }
SnowballStemmer stemmer = new SnowballStemmer(); stemmer.setStemmer("English"); StringToWordVector STWfilter = new StringToWordVector(1000); STWfilter.setUseStoplist(true); STWfilter.setIDFTransform(true); STWfilter.setTFTransform(true); STWfilter.setNormalizeDocLength(new SelectedTag(StringToWordVector.FILTER_NORMALIZE_ALL, StringToWordVector.TAGS_FILTER)); STWfilter.setOutputWordCounts(true); STWfilter.setStemmer(stemmer); STWfilter.setInputFormat(train);
filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter);
filter.setInputFormat(dataset); dataset = Filter.useFilter(dataset, filter);