private Event createEvent(String obs) { int lastSpace = obs.lastIndexOf(' '); if (lastSpace == -1) return null; else { String[] contexts = obs.substring(0,lastSpace).split("\\s+"); float[] values = RealValueFileEventStream.parseContexts(contexts); return new Event(obs.substring(lastSpace + 1),contexts,values); } }
@Test public void testWithValues() { Event event = new Event("o1", new String[]{"aa", "bb", "cc"}); Assert.assertEquals("o1", event.getOutcome()); Assert.assertArrayEquals(new String[]{"aa", "bb", "cc"}, event.getContext()); Assert.assertNull(event.getValues()); Assert.assertEquals("o1 [aa bb cc]", event.toString()); }
/** * Generates a string representing the specified event. * @param event The event for which a string representation is needed. * @return A string representing the specified event. */ public static String toLine(Event event) { StringBuilder sb = new StringBuilder(); sb.append(event.getOutcome()); String[] context = event.getContext(); for (int ci = 0,cl = context.length; ci < cl; ci++) { sb.append(" ").append(context[ci]); } sb.append(System.getProperty("line.separator")); return sb.toString(); }
protected List<ComparableEvent> index(ObjectStream<Event> events, Map<String, Integer> predicateIndex) throws IOException { Map<String, Integer> omap = new HashMap<>(); List<ComparableEvent> eventsToCompare = new ArrayList<>(); Event ev; while ((ev = events.read()) != null) { omap.putIfAbsent(ev.getOutcome(), omap.size()); int[] cons = Arrays.stream(ev.getContext()) .map(pred -> predicateIndex.get(pred)) .filter(Objects::nonNull) .mapToInt(i -> i).toArray(); // drop events with no active features if (cons.length > 0) { int ocID = omap.get(ev.getOutcome()); eventsToCompare.add(new ComparableEvent(ocID, cons, ev.getValues())); } else { display("Dropped event " + ev.getOutcome() + ":" + Arrays.asList(ev.getContext()) + "\n"); } } outcomeLabels = toIndexedStringArray(omap); predLabels = toIndexedStringArray(predicateIndex); return eventsToCompare; }
public Event read() throws IOException { Event event = samples.read(); if (event != null) { writer.write(event.toString()); writer.write("\n"); } return event; }
private void trainingStats(MutableContext[] params) throws IOException { int numCorrect = 0; int oei = 0; sequenceStream.reset(); Sequence sequence; while ((sequence = sequenceStream.read()) != null) { Event[] taggerEvents = sequenceStream.updateContext(sequence, new PerceptronModel(params,predLabels,outcomeLabels)); for (int ei = 0; ei < taggerEvents.length; ei++, oei++) { int max = omap.get(taggerEvents[ei].getOutcome()); if (max == outcomeList[oei]) { numCorrect ++; } } } display(". (" + numCorrect + "/" + numEvents + ") " + ((double) numCorrect / numEvents) + "\n"); } }
while ((ev = eventStream.read()) != null) { events.add(ev); update(ev.getContext(), counter);
eventCount++; eventStore.writeUTF(ev.getOutcome()); eventStore.writeInt(ev.getContext().length); String[] ec = ev.getContext(); update(ec, counter); for (String ctxString : ec) eventStore.writeUTF(ctxString); if (ev.getValues() == null) { eventStore.writeInt(0); eventStore.writeInt(ev.getValues().length); for (float value : ev.getValues()) eventStore.writeFloat(value);
private void testModel(MaxentModel model, Event event, double higher_probability) { double[] outcomes = model.eval(event.getContext()); String outcome = model.getBestOutcome(outcomes); Assert.assertEquals(2, outcomes.length); Assert.assertEquals(event.getOutcome(), outcome); if (event.getOutcome().equals(model.getOutcome(0))) { Assert.assertEquals(higher_probability, outcomes[0], 0.0001); } if (!event.getOutcome().equals(model.getOutcome(0))) { Assert.assertEquals(1.0 - higher_probability, outcomes[0], 0.0001); } if (event.getOutcome().equals(model.getOutcome(1))) { Assert.assertEquals(higher_probability, outcomes[1], 0.0001); } if (!event.getOutcome().equals(model.getOutcome(1))) { Assert.assertEquals(1.0 - higher_probability, outcomes[1], 0.0001); } }
@Override public Event read() throws IOException { Event event = super.read(); if (event != null) { digest.update(event.toString().getBytes(StandardCharsets.UTF_8)); } return event; }
/** * Tests that the outcomes for a single sentence match the * expected outcomes. */ @Test public void testOutcomesForSingleSentence() throws Exception { String sentence = "That_DT sounds_VBZ good_JJ ._."; POSSample sample = POSSample.parse(sentence); try (ObjectStream<Event> eventStream = new POSSampleEventStream( ObjectStreamUtils.createObjectStream(sample))) { Assert.assertEquals("DT", eventStream.read().getOutcome()); Assert.assertEquals("VBZ", eventStream.read().getOutcome()); Assert.assertEquals("JJ", eventStream.read().getOutcome()); Assert.assertEquals(".", eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); } } }
protected static void testModelOutcome(NaiveBayesModel model1, NaiveBayesModel model2, Event event) { String[] labels1 = extractLabels(model1); String[] labels2 = extractLabels(model2); Assert.assertArrayEquals(labels1, labels2); double[] outcomes1 = model1.eval(event.getContext()); double[] outcomes2 = model2.eval(event.getContext()); Assert.assertArrayEquals(outcomes1, outcomes2, 0.000000000001); }
public static List<Event> generateEvents(String[] sentence, String[] tags, Object[] additionalContext, POSContextGenerator cg) { List<Event> events = new ArrayList<Event>(sentence.length); for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = cg.getContext(i, sentence, tags, additionalContext); events.add(new Event(tags[i], context)); } return events; }
@Test public void testWithoutValues() { Event event = new Event("o1", new String[]{"aa", "bb", "cc"}, new float[]{0.2F, 0.4F, 0.4F}); Assert.assertEquals("o1", event.getOutcome()); Assert.assertArrayEquals(new String[]{"aa", "bb", "cc"}, event.getContext()); Assert.assertArrayEquals(new float[]{0.2F, 0.4F, 0.4F}, event.getValues(), 0.001F); Assert.assertEquals("o1 [aa=0.2 bb=0.4 cc=0.4]", event.toString()); } }
boolean update = false; for (int ei = 0; ei < events.length; ei++, oei++) { if (!taggerEvents[ei].getOutcome().equals(events[ei].getOutcome())) { update = true; String[] contextStrings = events[ei].getContext(); float[] values = events[ei].getValues(); int oi = omap.get(events[ei].getOutcome()); for (int ci = 0; ci < contextStrings.length; ci++) { float value = 1; String[] contextStrings = taggerEvent.getContext(); float[] values = taggerEvent.getValues(); int oi = omap.get(taggerEvent.getOutcome()); for (int ci = 0; ci < contextStrings.length; ci++) { float value = 1;
public static void testModel(MaxentModel model, double expecedAccuracy) throws IOException { List<Event> devEvents = readPpaFile("devset"); int total = 0; int correct = 0; for (Event ev: devEvents) { String targetLabel = ev.getOutcome(); double[] ocs = model.eval(ev.getContext()); int best = 0; for (int i = 1; i < ocs.length; i++) { if (ocs[i] > ocs[best]) { best = i; } } String predictedLabel = model.getOutcome(best); if (targetLabel.equals(predictedLabel)) correct++; total++; } double accuracy = correct / (double) total; System.out.println("Accuracy on PPA devset: (" + correct + "/" + total + ") " + accuracy); Assert.assertEquals(expecedAccuracy, accuracy, .00001); } }
@Test public void testSimpleReading() throws IOException { try (FileEventStream feStream = new FileEventStream(new StringReader(EVENTS))) { Assert.assertEquals("other [wc=ic w&c=he,ic n1wc=lc n1w&c=belongs,lc n2wc=lc]", feStream.read().toString()); Assert.assertEquals("other [wc=lc w&c=belongs,lc p1wc=ic p1w&c=he,ic n1wc=lc]", feStream.read().toString()); Assert.assertEquals("other [wc=lc w&c=to,lc p1wc=lc p1w&c=belongs,lc p2wc=ic]", feStream.read().toString()); Assert.assertEquals("org-start [wc=ic w&c=apache,ic p1wc=lc p1w&c=to,lc]", feStream.read().toString()); Assert.assertEquals("org-cont [wc=ic w&c=software,ic p1wc=ic p1w&c=apache,ic]", feStream.read().toString()); Assert.assertEquals("org-cont [wc=ic w&c=foundation,ic p1wc=ic p1w&c=software,ic]", feStream.read().toString()); Assert.assertEquals("other [wc=other w&c=.,other p1wc=ic]", feStream.read().toString()); Assert.assertNull(feStream.read()); } }
/** * Tests the event stream for correctly generated outcomes. */ @Test public void testEventOutcomes() throws IOException { ObjectStream<String> sentenceStream = ObjectStreamUtils.createObjectStream("\"<SPLIT>out<SPLIT>.<SPLIT>\""); ObjectStream<TokenSample> tokenSampleStream = new TokenSampleStream(sentenceStream); try (ObjectStream<Event> eventStream = new TokSpanEventStream(tokenSampleStream, false)) { Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(TokenizerME.SPLIT, eventStream.read().getOutcome()); Assert.assertNull(eventStream.read()); Assert.assertNull(eventStream.read()); } } }
while ((ev = eventStream.read()) != null) { events.add(ev); update(ev.getContext(), counter);
@Override public Event read() throws IOException { String line; if ((line = reader.readLine()) != null) { StringTokenizer st = new StringTokenizer(line); String outcome = st.nextToken(); int count = st.countTokens(); String[] context = new String[count]; for (int ci = 0; ci < count; ci++) { context[ci] = st.nextToken(); } return new Event(outcome, context); } else { return null; } }