/** * Convert an input instance. Any string attributes not being vectorized do * not have their values retained in memory (i.e. only the string values for * the instance being vectorized are held in memory). * * @param input the input instance * @return a converted instance * @throws Exception if there is no input format set and/or the dictionary has * not been constructed yet. */ public Instance vectorizeInstance(Instance input) throws Exception { return vectorizeInstance(input, new int[1], false); }
@Override protected Instance process(Instance instance) throws Exception { return m_vectorizer.vectorizeInstance(instance); }
/** * Convert an input instance. Any string attributes not being vectorized do * not have their values retained in memory (i.e. only the string values for * the instance being vectorized are held in memory). * * @param input the input instance * @return a converted instance * @throws Exception if there is no input format set and/or the dictionary has * not been constructed yet. */ public Instance vectorizeInstance(Instance input) throws Exception { return vectorizeInstance(input, new int[1], false); }
/** * Convert an input instance. * * @param input the input instance * @param retainStringAttValuesInMemory true if the values of string * attributes not being vectorized should be retained in memory * @return a converted instance * @throws Exception if there is no input format set and/or the dictionary has * not been constructed yet */ public Instance vectorizeInstance(Instance input, boolean retainStringAttValuesInMemory) throws Exception { return vectorizeInstance(input, new int[1], retainStringAttValuesInMemory); }
@Override protected Instance process(Instance instance) throws Exception { return m_vectorizer.vectorizeInstance(instance); }
/** * Convert an input instance. * * @param input the input instance * @param retainStringAttValuesInMemory true if the values of string * attributes not being vectorized should be retained in memory * @return a converted instance * @throws Exception if there is no input format set and/or the dictionary has * not been constructed yet */ public Instance vectorizeInstance(Instance input, boolean retainStringAttValuesInMemory) throws Exception { return vectorizeInstance(input, new int[1], retainStringAttValuesInMemory); }
/** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { Instance inst = m_dictionaryBuilder.vectorizeInstance(instance); push(inst, false); // No need to copy return true; } else { bufferInput(instance); return false; } }
/** * Input an instance for filtering. Filter requires all training instances be * read before producing output. * * @param instance the input instance. * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined. */ @Override public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { Instance inst = m_dictionaryBuilder.vectorizeInstance(instance); push(inst, false); // No need to copy return true; } else { bufferInput(instance); return false; } }
vectorized.add(vectorizeInstance(batch.instance(0), offsetHolder, true)); for (int i = 1; i < batch.numInstances(); i++) { vectorized .add(vectorizeInstance(batch.instance(i), offsetHolder, true));
vectorized.add(vectorizeInstance(batch.instance(0), offsetHolder, true)); for (int i = 1; i < batch.numInstances(); i++) { vectorized .add(vectorizeInstance(batch.instance(i), offsetHolder, true));
public void testVectorizeInstanceWordPresenceNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instance vectorized = builder.vectorizeInstance(data1.instance(0)); assertEquals(2, vectorized.numAttributes()); // values of the two attributes should be 1 (presence indicators) assertEquals(1, (int) vectorized.value(0)); assertEquals(1, (int) vectorized.value(1)); }
public void testVectorizeInstanceWordPresenceNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instance vectorized = builder.vectorizeInstance(data1.instance(0)); assertEquals(2, vectorized.numAttributes()); // values of the two attributes should be 1 (presence indicators) assertEquals(1, (int) vectorized.value(0)); assertEquals(1, (int) vectorized.value(1)); }
public void testVectorizeInstanceWordCountsNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setOutputWordCounts(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instance vectorized = builder.vectorizeInstance(data1.instance(0)); assertEquals(2, vectorized.numAttributes()); // "the" occurs twice in the first index and "over" once assertEquals(2, (int) vectorized.value(0)); assertEquals(1, (int) vectorized.value(1)); }
public void testVectorizeInstanceWordCountsNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setOutputWordCounts(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instance vectorized = builder.vectorizeInstance(data1.instance(0)); assertEquals(2, vectorized.numAttributes()); // "the" occurs twice in the first index and "over" once assertEquals(2, (int) vectorized.value(0)); assertEquals(1, (int) vectorized.value(1)); }