@Override public boolean areEqual(Object left, Object right) { if(left == right) return true; if(!(left instanceof CharSequence && right instanceof CharSequence)) return false; CharSequence l = (CharSequence)left, r = (CharSequence)right; int llen = l.length(), rlen = r.length(); char c1, c2; for (int i = 0, j = 0; i < llen && j < rlen;) { while (!category.contains(c1 = l.charAt(i++))) {} while (!category.contains(c2 = r.charAt(j++))) {} if(c1 != c2) return false; } return true; } }
@Override public boolean areEqual(Object left, Object right) { if(left == right) return true; if(!(left instanceof CharSequence && right instanceof CharSequence)) return false; CharSequence l = (CharSequence)left, r = (CharSequence)right; int llen = l.length(), rlen = r.length(); char c1, c2; for (int i = 0, j = 0; i < llen && j < rlen;) { while (category.contains(c1 = l.charAt(i++))) {} while (category.contains(c2 = r.charAt(j++))) {} if(c1 != c2) return false; } return true; } }
@Override public int hash(final Object data) { if(data == null) return 0; if(!(data instanceof CharSequence)) return data.hashCode(); CharSequence data2 = (CharSequence)data; long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L; final int len = data2.length(); char c; for (int i = 0; i < len; i++) { if(!category.contains(c = data2.charAt(i))) { result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL); result = (result << 54 | result >>> 10); } } result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); }
@Override public int hash(final Object data) { if(data == null) return 0; if(!(data instanceof CharSequence)) return data.hashCode(); CharSequence data2 = (CharSequence)data; long result = 0x1A976FDF6BF60B8EL, z = 0x60642E2A34326F15L; final int len = data2.length(); char c; for (int i = 0; i < len; i++) { if(category.contains(c = data2.charAt(i))) { result ^= (z += (c ^ 0xC6BC279692B5CC85L) * 0x6C8E9CF570932BABL); result = (result << 54 | result >>> 10); } } result += (z ^ z >>> 26) * 0x632BE59BD9B4E019L; result = (result ^ result >>> 33) * 0xFF51AFD7ED558CCDL; return (int) ((result ^ result >>> 33) * 0xC4CEB9FE1A85EC53L); }
/** * After calling {@link #analyze(CharSequence)}, you can optionally call this to alter any words in this MarkovText that * were used as a proper noun (determined by whether they were capitalized in the middle of a sentence), changing * them to a ciphered version using the given {@link NaturalLanguageCipher}. Normally you would initialize a * NaturalLanguageCipher with a {@link FakeLanguageGen} that matches the style you want for all names in this text, * then pass that to this method during pre-processing (not necessarily at runtime, since this method isn't * especially fast if the corpus was large). This method modifies this MarkovText in-place. * @param translator a NaturalLanguageCipher that will be used to translate proper nouns in this MarkovText's word array */ public void changeNames(NaturalLanguageCipher translator) { String name; PER_WORD: for (int i = 5; i < words.length; i++) { if(Category.Lu.contains((name = words[i]).charAt(0))) { for (int w = 5; w < words.length; w++) { for (int p = 0; p < processed[w].length; p++) { if (i == processed[w][++p] || i == processed[w][++p]) { words[i] = translator.cipher(name); continue PER_WORD; } } } } } } /**
/** * After calling {@link #analyze(CharSequence)}, you can optionally call this to alter any words in this MarkovTextLimited that * were used as a proper noun (determined by whether they were capitalized in the middle of a sentence), changing * them to a ciphered version using the given {@link NaturalLanguageCipher}. Normally you would initialize a * NaturalLanguageCipher with a {@link FakeLanguageGen} that matches the style you want for all names in this text, * then pass that to this method during pre-processing (not necessarily at runtime, since this method isn't * especially fast if the corpus was large). This method modifies this MarkovTextLimited in-place. * @param translator a NaturalLanguageCipher that will be used to translate proper nouns in this MarkovTextLimited's word array */ public void changeNames(NaturalLanguageCipher translator) { String name; PER_WORD: for (int i = 5; i < words.length; i++) { if(Category.Lu.contains((name = words[i]).charAt(0))) { for (int w = 5; w < words.length; w++) { for (int p = 0; p < processed[w].length; p++) { if (i == processed[w][++p] || i == processed[w][++p]) { words[i] = translator.cipher(name); continue PER_WORD; } } } } } } /**
public String lookup(String word) { if(word.isEmpty()) return word; String word2 = word.toLowerCase(); if(mappings.containsKey(word2)) { String nx = mappings.get(word2).next(); if(nx.isEmpty()) return nx; if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) return nx.toUpperCase(); if(Category.Lu.contains(word.charAt(0))) { return Character.toUpperCase(nx.charAt(0)) + nx.substring(1); } return nx; } else if(languages.containsKey(word2)) { if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) return languages.get(word2).word(rng, false, rng.between(2, 4)).toUpperCase(); if(Category.Lu.contains(word.charAt(0))) { return languages.get(word2).word(rng, true, rng.between(2, 4)); } return languages.get(word2).word(rng, false, rng.between(2, 4)); } return word; }
if (Category.Lu.contains(letter)) { letter = Character.toLowerCase(letter);
public void appendSubstitution(MatchResult match, TextBuffer dest) { String translated = match.group(0); if(translated == null) { return; } translated = translated.toLowerCase(); translated = vocabulary.get(translated); if(translated == null) { dest.append(match.group(0)); return; } char[] chars = translated.toCharArray(); if(Category.Lu.contains(match.charAt(0))) chars[0] = Character.toUpperCase(chars[0]); if(match.length() > 1 && Category.Lu.contains(match.charAt(1))) { for (int i = 1; i < chars.length; i++) { chars[i] = Character.toUpperCase(chars[i]); } } dest.append(chars, 0, chars.length); } }
private static int parseGroupId(char[] data, int i, int end, Term term, HashMap<String, Integer> gmap, char closer) throws PatternSyntaxException { int id; int nstart = i; if (Character.isDigit(data[i])) { while (Character.isDigit(data[i])) { i++; if (i == end) throw new PatternSyntaxException("group_id expected"); } id = makeNumber(nstart, i, data); } else { while (Category.IdentifierPart.contains(data[i])) { i++; if (i == end) throw new PatternSyntaxException("group_id expected"); } String s = new String(data, nstart, i - nstart); Integer no = gmap.get(s); if (no == null) throw new PatternSyntaxException("backreference to unknown group: " + s); id = no; } while (Category.Z.contains(data[i])) { i++; if (i == end) throw new PatternSyntaxException("'" + closer + "' expected"); } int c = data[i++]; if (c != closer) throw new PatternSyntaxException("'" + closer + "' expected"); term.memreg = id; return i; }
private static int parseGroupId(char[] data, int i, int end, Term term, HashMap<String, Integer> gmap, char closer) throws PatternSyntaxException { int id; int nstart = i; if (Character.isDigit(data[i])) { while (Character.isDigit(data[i])) { i++; if (i == end) throw new PatternSyntaxException("group_id expected"); } id = makeNumber(nstart, i, data); } else { while (Category.IdentifierPart.contains(data[i])) { i++; if (i == end) throw new PatternSyntaxException("group_id expected"); } String s = new String(data, nstart, i - nstart); Integer no = gmap.get(s); if (no == null) throw new PatternSyntaxException("backreference to unknown group: " + s); id = no; } while (Category.Space.contains(data[i])) { i++; if (i == end) throw new PatternSyntaxException("'" + closer + "' expected"); } int c = data[i++]; if (c != closer) throw new PatternSyntaxException("'" + closer + "' expected"); term.memreg = id; return i; }
private StringBuilder generateInner(StringBuilder name) { for (int runs = 0; runs < LAST_LETTER_CANDIDATES_MAX; runs++) { name.setLength(0); // Pick size int size = rng.getRandomElement(sizes); // Pick first letter char latest = rng.getRandomElement(firstLetterSamples); name.append(latest); for (int i = 1; i < size - 2; i++) { name.append(latest = getRandomNextLetter(latest)); } // Attempt to find a last letter for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) { char lastLetter = rng.getRandomElement(lastLetterSamples); char intermediateLetterCandidate = getIntermediateLetter(latest, lastLetter); // Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end) if (Category.L.contains(intermediateLetterCandidate)) { name.append(intermediateLetterCandidate).append(lastLetter); break; } } // Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher if (validateGrouping(name) && checkLevenshtein(name)) { return name; } } name.setLength(0); return name.append(rng.getRandomElement(names)); } /**
int nstart, nend; nstart = p; if(Category.N.contains(c1)) throw new PatternSyntaxException("number at the start of a named group"); while (Category.IdentifierPart.contains(c1)) { c1 = data[++p]; skip++; boolean isDecl; c = data[p]; while (Category.Space.contains(c)) { c = data[++p]; skip++; while (Category.IdentifierPart.contains(c)) { c = data[++p]; skip++; while (Category.Space.contains(c)) { c = data[++p]; skip++;
int nstart, nend; nstart = p; if(Category.N.contains(c1)) throw new PatternSyntaxException("number at the start of a named group"); while (Category.IdentifierPart.contains(c1)) { c1 = data[++p]; skip++; boolean isDecl; c = data[p]; while (Category.Z.contains(c)) { c = data[++p]; skip++; while (Category.IdentifierPart.contains(c)) { c = data[++p]; skip++; while (Category.Z.contains(c)) { c = data[++p]; skip++;
if(nx.isEmpty()) return; if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) if(Category.Lu.contains(word.charAt(0))) if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) else if(Category.Lu.contains(word.charAt(0))) if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) else if(Category.Lu.contains(word.charAt(0))) if(word.length() > 1 && Category.Lu.contains(word.charAt(1))) else if(Category.Lu.contains(word.charAt(0)))
char cp = data[p]; boolean mi = false, mb = false, mr = false; while (Category.Z.contains(cp) || Category.Po.contains(cp)) { p++; if (p == end) throw new PatternSyntaxException("'group_id' expected"); char cp = data[p]; boolean mi = false, mb = false, mr = false; while (Category.Z.contains(cp) || Category.Po.contains(cp)) { p++; if (p == end) throw new PatternSyntaxException("'group_id' expected");
char cp = data[p]; boolean mi = false, mb = false, mr = false; while (Category.Space.contains(cp) || Category.Po.contains(cp)) { p++; if (p == end) throw new PatternSyntaxException("'group_id' expected"); char cp = data[p]; boolean mi = false, mb = false, mr = false; while (Category.Space.contains(cp) || Category.Po.contains(cp)) { p++; if (p == end) throw new PatternSyntaxException("'group_id' expected");