/** * Formats a cluster label for final rendering. */ public String format(PreprocessingContext context, int featureIndex) { final char [][] wordsImage = context.allWords.image; final int [][] phrasesWordIndices = context.allPhrases.wordIndices; final int wordCount = wordsImage.length; final StringBuilder label = new StringBuilder(); if (featureIndex < wordCount) { final char [] image = wordsImage[featureIndex]; appendFormatted(label, image, true, false); } else { final boolean insertSpace = context.language.getLanguageCode().usesSpaceDelimiters(); final int [] wordIndices = phrasesWordIndices[featureIndex - wordCount]; final short [] termTypes = context.allWords.type; for (int i = 0; i < wordIndices.length; i++) { if (insertSpace && i > 0) label.append(' '); final int wordIndex = wordIndices[i]; appendFormatted(label, wordsImage[wordIndex], i == 0, TokenTypeUtils.isCommon(termTypes[wordIndex])); } } return label.toString(); }
/** * Build the cluster's label from suffix tree edge indices. */ private String buildLabel(int [] phraseIndices) { // Count the number of terms first. int termsCount = 0; for (int j = 0; j < phraseIndices.length; j += 2) { termsCount += phraseIndices[j + 1] - phraseIndices[j] + 1; } // Extract terms info for the phrase and construct the label. final boolean [] stopwords = new boolean[termsCount]; final char [][] images = new char [termsCount][]; final short [] tokenTypes = context.allWords.type; int k = 0; for (int i = 0; i < phraseIndices.length; i += 2) { for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; j++, k++) { final int termIndex = sb.input.get(j); images[k] = context.allWords.image[termIndex]; stopwords[k] = TokenTypeUtils.isCommon(tokenTypes[termIndex]); } } return LabelFormatter.format(images, stopwords, context.language.getLanguageCode().usesSpaceDelimiters()); }