@Override public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) { double c_W = seenCounter.getCount(iTW); boolean seen = (c_W > 0.0); if (seen) { return super.score(iTW, loc, word, featureSpec); } else { float score; // if (useMaxentUnknownWordModel) { // score = cml.score(iTW, 0); // } else { score = this.getUnknownWordModel().score(iTW, loc, 0.0, 0.0, 0.0, word); // ChineseUnknownWordModel doesn't use the final three params // } return score; } } }
pb_W_T = getUnknownWordModel().score(iTW, loc, c_T, total, smooth[0], word); } else { double pb_W0_T = getUnknownWordModel().score(iTW, 0, c_T, total, smooth[0], word); double pb_W1_T = getUnknownWordModel().score(iTW, 1, c_T, total, smooth[0], word); pb_W_T = Math.log((Math.exp(pb_W0_T) + 2 * Math.exp(pb_W1_T))/3);
private double probWordTag(String word, int loc, int wordId, int tagId) { double cW = wordTag.totalCount(wordId); double cWT = wordTag.getCount(wordId, tagId); // p_L double p_W = cW / wordTag.totalCount(); // p_T double cTseen = tagCounter.getCount(tagId); double p_T = cTseen / tagCounter.totalCount(); // p_T_L double p_W_T = 0.0; if (cW > 0.0) { // Seen lemma double p_T_W = 0.0; if (cW > 100.0 && cWT > 0.0) { p_T_W = cWT / cW; } else { double cTunseen = wordTagUnseen.getCount(tagId); // TODO p_T_U is 0? double p_T_U = cTunseen / wordTagUnseen.totalCount(); p_T_W = (cWT + smooth[1]*p_T_U) / (cW + smooth[1]); } p_W_T = p_T_W * p_W / p_T; } else { // Unseen word. Score based on the word signature (of the surface form) IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); double c_T = tagCounter.getCount(tagId); p_W_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word)); } return p_W_T; }
pb_W_T = getUnknownWordModel().score(iTW, loc, c_T, total, smooth[0], word); } else { double pb_W0_T = getUnknownWordModel().score(iTW, 0, c_T, total, smooth[0], word); double pb_W1_T = getUnknownWordModel().score(iTW, 1, c_T, total, smooth[0], word); pb_W_T = Math.log((Math.exp(pb_W0_T) + 2 * Math.exp(pb_W1_T))/3);
@Override public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) { double c_W = seenCounter.getCount(iTW); boolean seen = (c_W > 0.0); if (seen) { return super.score(iTW, loc, word, featureSpec); } else { float score; // if (useMaxentUnknownWordModel) { // score = cml.score(iTW, 0); // } else { score = this.getUnknownWordModel().score(iTW, loc, 0.0, 0.0, 0.0, word); // ChineseUnknownWordModel doesn't use the final three params // } return score; } } }
pb_W_T = getUnknownWordModel().score(iTW, loc, c_T, total, smooth[0], word); } else { double pb_W0_T = getUnknownWordModel().score(iTW, 0, c_T, total, smooth[0], word); double pb_W1_T = getUnknownWordModel().score(iTW, 1, c_T, total, smooth[0], word); pb_W_T = Math.log((Math.exp(pb_W0_T) + 2 * Math.exp(pb_W1_T))/3);
@Override public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) { double c_W = seenCounter.getCount(iTW); boolean seen = (c_W > 0.0); if (seen) { return super.score(iTW, loc, word, featureSpec); } else { float score; // if (useMaxentUnknownWordModel) { // score = cml.score(iTW, 0); // } else { score = this.getUnknownWordModel().score(iTW, loc, 0.0, 0.0, 0.0, word); // ChineseUnknownWordModel doesn't use the final three params // } return score; } } }
pb_W_T = getUnknownWordModel().score(iTW, loc, c_T, total, smooth[0], word); } else { double pb_W0_T = getUnknownWordModel().score(iTW, 0, c_T, total, smooth[0], word); double pb_W1_T = getUnknownWordModel().score(iTW, 1, c_T, total, smooth[0], word); pb_W_T = Math.log((Math.exp(pb_W0_T) + 2 * Math.exp(pb_W1_T))/3);
@Override public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) { double c_W = seenCounter.getCount(iTW); boolean seen = (c_W > 0.0); if (seen) { return super.score(iTW, loc, word, featureSpec); } else { float score; // if (useMaxentUnknownWordModel) { // score = cml.score(iTW, 0); // } else { score = this.getUnknownWordModel().score(iTW, loc, 0.0, 0.0, 0.0, word); // ChineseUnknownWordModel doesn't use the final three params // } return score; } } }
@Override public float score(IntTaggedWord iTW, int loc) { double c_W = seenCounter.getCount(iTW); boolean seen = (c_W > 0.0); if (seen) { if (useRandomWalk) { return (float) scoreRandomWalk(iTW); } else { return super.score(iTW, loc); } } else { float score; // if (useMaxentUnknownWordModel) { // score = cml.score(iTW, 0); // } else { score = this.getUnknownWordModel().score(iTW, loc, 0.0, 0.0, 0.0); // ChineseUnknownWordModel doesn't use the final three params // } return score; } }
private double probWordTag(String word, int loc, int wordId, int tagId) { double cW = wordTag.totalCount(wordId); double cWT = wordTag.getCount(wordId, tagId); // p_L double p_W = cW / wordTag.totalCount(); // p_T double cTseen = tagCounter.getCount(tagId); double p_T = cTseen / tagCounter.totalCount(); // p_T_L double p_W_T = 0.0; if (cW > 0.0) { // Seen lemma double p_T_W = 0.0; if (cW > 100.0 && cWT > 0.0) { p_T_W = cWT / cW; } else { double cTunseen = wordTagUnseen.getCount(tagId); // TODO p_T_U is 0? double p_T_U = cTunseen / wordTagUnseen.totalCount(); p_T_W = (cWT + smooth[1]*p_T_U) / (cW + smooth[1]); } p_W_T = p_T_W * p_W / p_T; } else { // Unseen word. Score based on the word signature (of the surface form) IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); double c_T = tagCounter.getCount(tagId); p_W_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word)); } return p_W_T; }
private double probWordTag(String word, int loc, int wordId, int tagId) { double cW = wordTag.totalCount(wordId); double cWT = wordTag.getCount(wordId, tagId); // p_L double p_W = cW / wordTag.totalCount(); // p_T double cTseen = tagCounter.getCount(tagId); double p_T = cTseen / tagCounter.totalCount(); // p_T_L double p_W_T = 0.0; if (cW > 0.0) { // Seen lemma double p_T_W = 0.0; if (cW > 100.0 && cWT > 0.0) { p_T_W = cWT / cW; } else { double cTunseen = wordTagUnseen.getCount(tagId); // TODO p_T_U is 0? double p_T_U = cTunseen / wordTagUnseen.totalCount(); p_T_W = (cWT + smooth[1]*p_T_U) / (cW + smooth[1]); } p_W_T = p_T_W * p_W / p_T; } else { // Unseen word. Score based on the word signature (of the surface form) IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); double c_T = tagCounter.getCount(tagId); p_W_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word)); } return p_W_T; }
IntTaggedWord iTW = new IntTaggedWord(wordId, tagId); double c_T = tagCounter.getCount(tagId); p_W_T = Math.exp(getUnknownWordModel().score(iTW, loc, c_T, tagCounter.totalCount(), smooth[0], word));