protected void findStrings(Property property, String path) { if (property instanceof StringProperty) { if (isInterestingPath(path)) { Serializable value = property.getValue(); if (value instanceof String) { if (isInterestingPath(path)) { Serializable value = property.getValue(); if (value instanceof Object[]) { for (Property p : ((ComplexProperty) property).getChildren()) { String pp = p.getField().getName().getPrefixedName(); findStrings(p, path + '/' + pp); findStrings(p, path + "/*");
protected void extractAndUpdateSimpleText() { if (fulltextConfiguration.fulltextSearchDisabled) { // if fulltext search is disabled, we don't extract simple text at all return; } for (String indexName : fulltextConfiguration.indexNames) { if (!fulltextConfiguration.indexesAllSimple.contains(indexName) && fulltextConfiguration.propPathsByIndexSimple.get(indexName) == null) { // nothing to do: index not configured for simple text continue; } Set<String> includedPaths = fulltextConfiguration.indexesAllSimple.contains(indexName) ? null : fulltextConfiguration.propPathsByIndexSimple.get(indexName); Set<String> excludedPaths = fulltextConfiguration.propPathsExcludedByIndexSimple.get(indexName); // get string properties List<String> strings = new StringsExtractor().findStrings(document, includedPaths, excludedPaths); // transform to text (remove HTML and entities) // we do this here rather than in the indexing backend (Elasticsearch) because it's more efficient here // add space at beginning and end for simulated phrase search using LIKE "% foo bar %" String text = strings.stream().map(this::stringToText).collect(Collectors.joining(" ", " ", " ")); // limit size text = limitStringSize(text, fulltextConfiguration.fulltextFieldSizeLimit); String property = getFulltextPropertyName(SYSPROP_FULLTEXT_SIMPLE, indexName); for (DocumentRef docRef : docsToUpdate) { session.setDocumentSystemProp(docRef, property, text); } } }
/** * Finds strings from the document for a given set of included and excluded paths. * <p> * Paths must be specified with a schema prefix in all cases (normalized). * * @param document the document * @param includedPaths the paths to include, or {@code null} for all paths * @param excludedPaths the paths to exclude, or {@code null} for none * @return a list of strings (each string is never {@code null}) */ public List<String> findStrings(DocumentModel document, Set<String> includedPaths, Set<String> excludedPaths) { this.document = document; this.includedPaths = includedPaths; this.excludedPaths = excludedPaths; strings = new ArrayList<>(); for (String schema : document.getSchemas()) { for (Property property : document.getPropertyObjects(schema)) { String path = property.getField().getName().getPrefixedName(); if (!path.contains(":")) { // add schema name as prefix if the schema doesn't have a prefix path = property.getSchema().getName() + ":" + path; } findStrings(property, path); } } return strings; }