org.apache.solr.schema.FieldType.getAnalyzer java code examples

/** 
 * @param language
 * @param indexField
 * @param analyzer
 * @param fst
 * @param allowCreation
 */
protected CorpusCreationInfo(String language, String indexField, String storeField, FieldType fieldType, File fst){
  this.language = language;
  this.indexedField = indexField;
  this.storedField = storeField;
  this.fst = fst;
  this.analyzer = fieldType.getAnalyzer();
}

/** 
 * @param language
 * @param indexField
 * @param analyzer
 * @param fst
 * @param allowCreation
 */
protected CorpusCreationInfo(String language, String indexField, String storeField, FieldType fieldType, File fst){
  this.language = language;
  this.indexedField = indexField;
  this.storedField = storeField;
  this.fst = fst;
  this.analyzer = fieldType.getAnalyzer();
}

protected Analyzer getAnalyzer(String fieldName)
{
 Analyzer analyzer = analyzers.get(fieldName);
 return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getAnalyzer();
}

/** 
 * @param language
 * @param indexField
 * @param analyzer
 * @param fst
 * @param allowCreation
 */
protected CorpusInfo(String language, String indexField, String storeField, FieldType fieldType, File fst, boolean allowCreation){
  this.language = language;
  this.indexedField = indexField;
  this.storedField = storeField;
  this.fst = fst;
  this.allowCreation = allowCreation;
  this.analyzer = fieldType.getAnalyzer();
  this.taggingAnalyzer = fieldType.getQueryAnalyzer();
  this.fstDate = fst.isFile() ? new Date(fst.lastModified()) : null;
}
/**

protected HashMap<String,Analyzer> analyzerCache() {
 HashMap<String,Analyzer> cache = new HashMap<String,Analyzer>();
  for (SchemaField f : getFields().values()) {
  Analyzer analyzer = f.getType().getAnalyzer();
  cache.put(f.getName(), analyzer);
 }
 return cache;
}

protected void checkAllowLeadingWildcards() {
 boolean allow = false;
 for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) {
  Analyzer a = e.getValue().getAnalyzer();
  if (a instanceof TokenizerChain) {
   // examine the indexing analysis chain if it supports leading wildcards
   TokenizerChain tc = (TokenizerChain)a;
   TokenFilterFactory[] factories = tc.getTokenFilterFactories();
   for (TokenFilterFactory factory : factories) {
    if (factory instanceof ReversedWildcardFilterFactory) {
     allow = true;
     leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory);
    }
   }
  }
 }
 // XXX should be enabled on a per-field basis
 if (allow) {
  setAllowLeadingWildcard(true);
 }
}

/** Add the xpathFields to the indexConfig using information about the field drawn from the schema. */
private void addXPathFields() {
  for (Entry<String,String> f : xpathFieldConfig) {
    SchemaField field = schema.getField(f.getKey());
    FieldType fieldType = field.getType();
    if (fieldType == null) {
      throw new SolrException(ErrorCode.SERVER_ERROR, "Field " + f.getKey() + " declared in lux config, but not defined in schema");
    }
    XPathField xpathField = new XPathField(f.getKey(), f.getValue(), fieldType.getAnalyzer(), field.stored() ? Store.YES : Store.NO, field);
    indexConfig.addField(xpathField);
  }
}

Analyzer analyzer = getAnalyzer();
if (analyzer instanceof SolrAnalyzer) {
 ((SolrAnalyzer)analyzer).setPositionIncrementGap(Integer.parseInt(positionInc));

IndexWriter writer = new IndexWriter(ramDir, fieldType.getAnalyzer(),
    true, IndexWriter.MaxFieldLength.UNLIMITED);
writer.setMergeFactor(300);

for (String name : doc.getFieldNames()) {
 FieldType ft = schema.getFieldType(name);
 Analyzer analyzer = ft.getAnalyzer();
 Collection<Object> vals = doc.getFieldValues(name);
 for (Object val : vals) {

public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) {
 super(builder, config, parent, child, context);
 this.inputFieldName = getConfigs().getString(config, "inputField");
 this.outputFieldName = getConfigs().getString(config, "outputField");      
 String solrFieldType = getConfigs().getString(config, "solrFieldType");      
 Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator");
 SolrLocator locator = new SolrLocator(solrLocatorConfig, context);
 LOG.debug("solrLocator: {}", locator);
 IndexSchema schema = locator.getIndexSchema();
 FieldType fieldType = schema.getFieldTypeByName(solrFieldType);
 if (fieldType == null) {
  throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config);
 }
 this.analyzer = fieldType.getAnalyzer();
 Preconditions.checkNotNull(analyzer);
 try { // register CharTermAttribute for later (implicit) reuse
  this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
 } catch (IOException e) {
  throw new MorphlineCompilationException("Cannot create token stream", config, e);
 }
 Preconditions.checkNotNull(token);
 validateArguments();
}

 field.add("uniqueKey", true);
if (ft.getAnalyzer().getPositionIncrementGap(f.getName()) != 0) {
  field.add("positionIncrementGap", ft.getAnalyzer().getPositionIncrementGap(f.getName()));

field.add("tokenized", ft.isTokenized() );
field.add("className", ft.getClass().getName());
field.add("indexAnalyzer", getAnalyzerInfo(ft.getAnalyzer()));
field.add("queryAnalyzer", getAnalyzerInfo(ft.getQueryAnalyzer()));
types.add( ft.getTypeName(), field );

AnalysisContext context = new AnalysisContext(fieldName, fieldType, fieldType.getAnalyzer(), termsToMatch);
NamedList analyzedTokens = analyzeValue(analysisRequest.getFieldValue(), context);
analyzeResults.add("index", analyzedTokens);

Analyzer xmlQueryAnalyzer = null;
if (schemaField != null) {
  xmlAnalyzer = schemaField.getType().getAnalyzer();
  xmlQueryAnalyzer = schemaField.getType().getQueryAnalyzer();
  if (xmlAnalyzer != null) {
  Analyzer analyzer = destination.getType().getAnalyzer();
  if (analyzer == null) {

@Test
public void testSirenFieldAnalyzer() throws Exception {
 final IndexSchema schema = h.getCore().getLatestSchema();
 final SchemaField ntriple = schema.getField(JSON_FIELD);
 final FieldType tmp = ntriple.getType();
 assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
 final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory);
 // 3 filters for index analyzer
 assertNotNull(ts.getTokenFilterFactories());
 assertEquals(3, ts.getTokenFilterFactories().length);
 assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory);
}

@Test
public void testSirenFieldDatatypeAnalyzer() throws Exception {
 final IndexSchema schema = h.getCore().getLatestSchema();
 final SchemaField ntriple = schema.getField(JSON_FIELD);
 final FieldType tmp = ntriple.getType();
 TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
 assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
 final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0];
 assertNotNull(f.getDatatypeAnalyzers());
 assertEquals(9, f.getDatatypeAnalyzers().size());
 assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field"));
 ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field");
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
 assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
 ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);
 assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"));
 assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer);
 final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int");
 assertEquals(8, a.getPrecisionStep());
 assertEquals(32, a.getNumericParser().getValueSize());
 assertEquals(NumericType.INT, a.getNumericParser().getNumericType());
}

@Test
public void testSirenFieldDatatypeAnalyzer() throws Exception {
 final IndexSchema schema = h.getCore().getSchema();
 final SchemaField ntriple = schema.getField(JSON_FIELD);
 final FieldType tmp = ntriple.getType();
 TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
 assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
 final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0];
 assertNotNull(f.getDatatypeAnalyzers());
 assertEquals(9, f.getDatatypeAnalyzers().size());
 assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field"));
 ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field");
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
 assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
 ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);
 assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"));
 assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer);
 final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int");
 assertEquals(8, a.getPrecisionStep());
 assertEquals(32, a.getNumericParser().getValueSize());
 assertEquals(NumericType.INT, a.getNumericParser().getNumericType());
}

@Test
public void testConciseSirenFieldAnalyzer() throws Exception {
 final IndexSchema schema = h.getCore().getLatestSchema();
 final SchemaField json = schema.getField("concise");
 final FieldType tmp = json.getType();
 assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
 final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory);
 // 4 filters for index analyzer
 assertNotNull(ts.getTokenFilterFactories());
 assertEquals(4, ts.getTokenFilterFactories().length);
 assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory);
}

@Test
public void testSirenFieldAnalyzer() throws Exception {
 final IndexSchema schema = h.getCore().getSchema();
 final SchemaField ntriple = schema.getField(JSON_FIELD);
 final FieldType tmp = ntriple.getType();
 assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
 final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
 assertNotNull(ts.getTokenizerFactory());
 assertTrue(ts.getTokenizerFactory() instanceof JsonTokenizerFactory);
 // 3 filters for index analyzer
 assertNotNull(ts.getTokenFilterFactories());
 assertEquals(3, ts.getTokenFilterFactories().length);
 assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory);
 assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory);
 // no query analyzer
 assertNull(tmp.getQueryAnalyzer());
}

Javadoc

Returns the Analyzer to be used when indexing fields of this type.

This method may be called many times, at any time.

Popular methods of FieldType

getQueryAnalyzer
Returns the Analyzer to be used when searching fields of this type. This method may be called many t
getTypeName
The Name of this FieldType as specified in the schema file
getIndexAnalyzer
toObject
isTokenized
Returns true if fields of this type should be tokenized
toInternal
Convert an external value (from XML update command or from query string) into the internal format fo
getPostingsFormat
getRangeQuery
Returns a Query instance for doing range searches on this field type. org.apache.solr.search.SolrQue
getValueSource
called to get the default value source (normally, from the Lucene FieldCache.)
isMultiValued
Returns true if fields can have multiple values
readableToIndexed
Given the readable value, return the term value that will match it.
storedToIndexed
Given the stored field, return the indexed form

Popular in Java

Updating database using SQL prepared statement
getContentResolver (Context)
setRequestProperty (URLConnection)
notifyDataSetChanged (ArrayAdapter)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Table (org.hibernate.mapping)
A relational table
Best plugins for Eclipse

How to use getAnalyzermethodin org.apache.solr.schema.FieldType

Best Java code snippets using org.apache.solr.schema.FieldType.getAnalyzer (Showing top 20 results out of 315)

How to use
getAnalyzer
method
in
org.apache.solr.schema.FieldType