/** * Creates a new instance of a TcFeature * * @param featureName * The class of a feature extractor * @param parameters * The configuration parameters for this feature extractor * @return Configured feature which has a randomly assigned unique identification */ public static TcFeature create(Class<? extends Resource> featureName, Object... parameters) { /* * Each feature has to set a unique name which is build from the simple name of the feature * class and extended with a random value */ String id = featureName.getSimpleName() + System.nanoTime(); List<Object> params = getParameterAsString(parameters); params.add(FeatureExtractorResource_ImplBase.PARAM_UNIQUE_EXTRACTOR_NAME); params.add(id); TcFeature tcFeature = new TcFeature(featureName, id, params.toArray()); return tcFeature; }
/** * Creates a new instance of a TcFeature * * @param featureName * The class of a feature extractor * @param parameters * The configuration parameters for this feature extractor * @return Configured feature which has a randomly assigned unique identification */ public static TcFeature create(Class<? extends Resource> featureName, Object... parameters) { /* * Each feature has to set a unique name which is build from the simple name of the feature * class and extended with a random value */ String id = featureName.getSimpleName() + System.nanoTime(); List<Object> params = getParameterAsString(parameters); params.add(FeatureExtractorResource_ImplBase.PARAM_UNIQUE_EXTRACTOR_NAME); params.add(id); TcFeature tcFeature = new TcFeature(featureName, id, params.toArray()); return tcFeature; }
/** * Creates a new instance of a TcFeature which allows the user to set an own id * * @param id * The id of the feature which must be unique among all used features * @param featureName * The class of the feature extractor that shall be instantiated * @param parameters * The list of the parameters for this feature extractor * @return A configured feature which is identified by a user provided identification string */ public static TcFeature create(String id, Class<? extends Resource> featureName, Object... parameters) { /* * Each feature has to set a unique name which is build from the simple name of the feature * class and extended with a random value */ List<Object> params = getParameterAsString(parameters); params.add(FeatureExtractorResource_ImplBase.PARAM_UNIQUE_EXTRACTOR_NAME); params.add(id); TcFeature tcFeature = new TcFeature(featureName, id, params.toArray()); return tcFeature; }
/** * Creates a new instance of a TcFeature which allows the user to set an own id * * @param id * The id of the feature which must be unique among all used features * @param featureName * The class of the feature extractor that shall be instantiated * @param parameters * The list of the parameters for this feature extractor * @return A configured feature which is identified by a user provided identification string */ public static TcFeature create(String id, Class<? extends Resource> featureName, Object... parameters) { /* * Each feature has to set a unique name which is build from the simple name of the feature * class and extended with a random value */ List<Object> params = getParameterAsString(parameters); params.add(FeatureExtractorResource_ImplBase.PARAM_UNIQUE_EXTRACTOR_NAME); params.add(id); TcFeature tcFeature = new TcFeature(featureName, id, params.toArray()); return tcFeature; }
private static TcFeatureSet getFeatureSet() { return new TcFeatureSet("DummyFeatureSet", TcFeatureFactory.create(TokenRatioPerDocument.class), TcFeatureFactory.create(CharacterNGram.class, CharacterNGram.PARAM_NGRAM_USE_TOP_K, 500, CharacterNGram.PARAM_NGRAM_MIN_N, 1, CharacterNGram.PARAM_NGRAM_MAX_N, 3)); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(WordNGram.class) ); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet( TcFeatureFactory.create(DiffNrOfTokensPairFeatureExtractor.class)); }
public TcFeatureSet getFeatureSet() { return new TcFeatureSet(TcFeatureFactory.create(CharacterNGram.class)); }
set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, -1)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, 0)); set.add(create(TargetSurfaceFormContextFeature.class, TargetSurfaceFormContextFeature.PARAM_RELATIVE_TARGET_ANNOTATION_INDEX, +1)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, -1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, 0, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(TokenContext.class, TokenContext.PARAM_TARGET_INDEX, +1, TokenContext.PARAM_NGRAM_USE_TOP_K, 1000)); set.add(create(IsFirstLetterCapitalized.class)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 1, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 1, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 50)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 2, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 2, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 3, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 3, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750)); set.add(create(LuceneCharacterNGram.class, LuceneCharacterNGram.PARAM_NGRAM_MIN_N, 4, LuceneCharacterNGram.PARAM_NGRAM_MAX_N, 4, LuceneCharacterNGram.PARAM_NGRAM_USE_TOP_K, 750));
tcModelLocation.getAbsolutePath()); TcFeature feature = TcFeatureFactory.create(id, feClass, idRemovedParameters.toArray()); ExternalResourceDescription exRes = feature.getActualValue();
tcModelLocation.getAbsolutePath()); TcFeature feature = TcFeatureFactory.create(id, feClass, idRemovedParameters.toArray()); ExternalResourceDescription exRes = feature.getActualValue();
public static void runExperiment() throws Exception { CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription( FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTrain, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt"); CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription( FolderwiseDataReader.class, FolderwiseDataReader.PARAM_SOURCE_LOCATION, corpusFilePathTest, FolderwiseDataReader.PARAM_LANGUAGE, LANGUAGE_CODE, FolderwiseDataReader.PARAM_PATTERNS, "*/*.txt"); ExperimentBuilder builder = new ExperimentBuilder(); builder.experiment(ExperimentType.TRAIN_TEST, "trainTest") .dataReaderTrain(readerTrain) .dataReaderTest(readerTest) .preprocessing(getPreprocessing()) .features(TcFeatureFactory.create(WordNGram.class)) .learningMode(LearningMode.SINGLE_LABEL) .featureMode(FeatureMode.DOCUMENT) .machineLearningBackend( new MLBackend(new LibsvmAdapter()) ) .run(); }