private static TcFeatureSet getFeatureSet() { return new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 500, CharacterNGram.PARAM_NGRAM_MIN_N, 1, CharacterNGram.PARAM_NGRAM_MAX_N, 3)); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(WordNGram.class) ); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class)); }
set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, -1)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, 0)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, +1)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, -1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, 0, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, +1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(IsFirstLetterCapitalized.class)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 1, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 1, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 2, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 2, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 3, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 3, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 4, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 4, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750));
tcModelLocation.getAbsolutePath()); TcFeature feature = TcFeatureFactory.create(id, feClass, idRemovedParameters.toArray()); ExternalResourceDescription exRes = feature.getActualValue();
tcModelLocation.getAbsolutePath()); TcFeature feature = TcFeatureFactory.create(id, feClass, idRemovedParameters.toArray()); ExternalResourceDescription exRes = feature.getActualValue();
public static void runExperiment() throws Exception { CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription( FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt"); CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription( FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt"); ExperimentBuilder builder = new ExperimentBuilder(); builder.experiment(ExperimentType.TRAIN_TEST, "trainTest") .dataReaderTrain(readerTrain) .dataReaderTest(readerTest) .preprocessing(getPreprocessing()) .features(TcFeatureFactory.create(WordNGram.class)) .learningMode(LearningMode.SINGLE_LABEL) .featureMode(FeatureMode.DOCUMENT) .machineLearningBackend( new MLBackend(new LibsvmAdapter()) ) .run(); }