EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), ChineseTreebankLanguagePack.ENCODING); if ( ! child.isPhrasal()) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Deleting META tree that should be XML metadata in chtb_5200.df: " + child, ChineseTreebankLanguagePack.ENCODING); return null; EncodingPrintWriter.err.println("Error: tree with no children: " + tree, ChineseTreebankLanguagePack.ENCODING); if (subsubtree.value().equals("ROOT")) { if (subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) { EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6 (chtb_3095.bn): " + newTree, ChineseTreebankLanguagePack.ENCODING); List<Tree> children = subsubtree.getChildrenAsList(); children = children.subList(1,children.size()); subtree.setChildren(children); EncodingPrintWriter.err.println(" Corrected as: " + newTree, ChineseTreebankLanguagePack.ENCODING); // spaced to align with above if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().test(subtree.firstChild().value())) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); if (subtree.firstChild().value().matches("他")) {
public static void println(String o, String encoding) { setupErrWriter(encoding); cachedErrWriter.println(o); }
EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), ChineseTreebankLanguagePack.ENCODING); if ( ! child.isPhrasal()) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Deleting META tree that should be XML metadata in chtb_5200.df: " + child, ChineseTreebankLanguagePack.ENCODING); return null; EncodingPrintWriter.err.println("Error: tree with no children: " + tree, ChineseTreebankLanguagePack.ENCODING); if (subsubtree.value().equals("ROOT")) { if (subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) { EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6 (chtb_3095.bn): " + newTree, ChineseTreebankLanguagePack.ENCODING); List<Tree> children = subsubtree.getChildrenAsList(); children = children.subList(1,children.size()); subtree.setChildren(children); EncodingPrintWriter.err.println(" Corrected as: " + newTree, ChineseTreebankLanguagePack.ENCODING); // spaced to align with above if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().test(subtree.firstChild().value())) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); if (subtree.firstChild().value().matches("他")) {
EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), ChineseTreebankLanguagePack.ENCODING); Tree child = kids[0]; if ( ! child.isPhrasal()) { EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, ChineseTreebankLanguagePack.ENCODING); Tree added = tf.newTreeNode("FRAG", Arrays.asList(kids)); newTree.setChild(0, added); EncodingPrintWriter.err.println("Error: tree with no children: " + tree, ChineseTreebankLanguagePack.ENCODING); if (subsubtree.value().equals("ROOT")) { if (subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) { EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6: " + newTree, ChineseTreebankLanguagePack.ENCODING); List<Tree> children = subsubtree.getChildrenAsList(); children = children.subList(1,children.size()); subtree.setChildren(children); EncodingPrintWriter.err.println(" Corrected as: " + newTree, ChineseTreebankLanguagePack.ENCODING); // spaced to align with above if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(subtree.firstChild().value())) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); if (subtree.firstChild().value().matches("\u4ed6")) { if (DEBUG) {
EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), ChineseTreebankLanguagePack.ENCODING); if ( ! child.isPhrasal()) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Deleting META tree that should be XML metadata in chtb_5200.df: " + child, ChineseTreebankLanguagePack.ENCODING); return null; EncodingPrintWriter.err.println("Error: tree with no children: " + tree, ChineseTreebankLanguagePack.ENCODING); if (subsubtree.value().equals("ROOT")) { if (subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) { EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6 (chtb_3095.bn): " + newTree, ChineseTreebankLanguagePack.ENCODING); List<Tree> children = subsubtree.getChildrenAsList(); children = children.subList(1,children.size()); subtree.setChildren(children); EncodingPrintWriter.err.println(" Corrected as: " + newTree, ChineseTreebankLanguagePack.ENCODING); // spaced to align with above if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().test(subtree.firstChild().value())) { if (DEBUG) { EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); if (subtree.firstChild().value().matches("他")) {
if (Character.isHighSurrogate(cp)) { if (i + 1 < len) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: non-BMP codepoint U+" + Integer.toHexString(Character.codePointAt(in, i)) + " in " + in); } else { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: unmatched high surrogate character U+" + Integer.toHexString(Character.codePointAt(in, i)) + " in " + in); cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) { EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), ChineseTreebankLanguagePack.ENCODING); Tree child = kids[0]; if ( ! child.isPhrasal()) { EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, ChineseTreebankLanguagePack.ENCODING); Tree added = tf.newTreeNode("FRAG", Arrays.asList(kids)); newTree.setChild(0, added); EncodingPrintWriter.err.println("Error: tree with no children: " + tree, ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6: " + newTree, ChineseTreebankLanguagePack.ENCODING); List<Tree> children = subtree.getChildrenAsList(); children = children.subList(1,children.size() - 1); if (subtree.value().matches("NP")) { if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(subtree.firstChild().value())) { EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, ChineseTreebankLanguagePack.ENCODING); subtree.setValue("PU"); } else if (subtree.parent(newTree).value().matches("NP")) { EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); subtree.setValue("NN"); } else { EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), ChineseTreebankLanguagePack.ENCODING); EncodingPrintWriter.err.println("Correcting error: \"\u4ed6\" under PU tag; tag changed to PN: " + subtree, ChineseTreebankLanguagePack.ENCODING); subtree.setValue("PN"); } else if (subtree.firstChild().value().matches("tw|\u534A\u7A74\u5F0F")) { EncodingPrintWriter.err.println("Correcting error: \"" + subtree.firstChild().value() + "\" under PU tag; tag changed to NN: " + subtree, ChineseTreebankLanguagePack.ENCODING);
if (pf != null) pf.print('|' + correctTags[i]); if (verboseResults) { EncodingPrintWriter.err.println((maxentTagger.dict.isUnknown(sent.get(i)) ? "Unk" : "") + "Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);
if (pf != null) pf.print('|' + correctTags[i]); if (verboseResults) { EncodingPrintWriter.err.println((maxentTagger.dict.isUnknown(sent.get(i)) ? "Unk" : "") + "Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);
if (pf != null) pf.print('|' + correctTags[i]); if (verboseResults) { EncodingPrintWriter.err.println((maxentTagger.dict.isUnknown(sent.get(i)) ? "Unk" : "") + "Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);
if (pf != null) pf.print('|' + correctTags[i]); if (verboseResults) { EncodingPrintWriter.err.println("Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);