/** * Effectively marks the segment for removal by emptying the content for the * given target. the text unit will be pruned by a different method * ({@link #pruneTextUnit(ITextUnit, LocaleId)}). * * @param tu the text unit containing the content * @param seg the segment to be marked for removal * @param targetLocale the locale for which the segment should be removed */ protected void markSegmentForRemoval(ITextUnit tu, Segment seg, LocaleId targetLocale) { tu.getTargetSegment(targetLocale, seg.getId(), false).getContent().clear(); }
private boolean hasExistingCandidate (Segment srcSeg, ISegments trgSegs) { if ( trgSegs == null ) return false; // No target at all Segment trgSeg = trgSegs.get(srcSeg.getId()); if ( trgSeg == null ) return false; // No target segment // Do we have the annotation? AltTranslationsAnnotation ann = trgSeg.getAnnotation(AltTranslationsAnnotation.class); if ( ann == null ) return false; // No AltTranslationsAnnotation // Do we have at least one entry return (ann.getFirst() != null); }
TextFragment trgFragment = tu.getTargetSegment(targetLocale, seg.getId(), false).getContent(); String srcText = seg.getContent().getCodedText(); String trgText = trgFragment.getCodedText();
private void checkUnusualCharacters(ITextUnit tu, Segment seg, LocaleId targetLocale) { TextFragment trgFrag = tu.getTargetSegment(targetLocale, seg.getId(), false).getContent(); StringBuilder srcText = new StringBuilder(seg.getContent().getCodedText()); StringBuilder trgText = new StringBuilder(trgFrag.getCodedText()); boolean isFound = false; Pattern pattern; // any 3 extended characters together pattern = Pattern.compile("[\\u00C0-\\u00FF]{3}"); // check source if ((pattern.matcher(srcText).find() == true) && (isFound == false)) { isFound = true; markSegmentForRemoval(tu, seg, targetLocale); } // check target if ((pattern.matcher(trgText).find() == true) && (isFound == false)) { isFound = true; markSegmentForRemoval(tu, seg, targetLocale); } }
/** * Attempts to detect character corruption from either the source or target. * If any corruption are detected, the segment is marked for removal. * * @param tu the text unit to be modified * @param seg the source segment to be modified * @param targetLocale the locale used to fetch the target text */ private void removeCorruptions(ITextUnit tu, Segment seg, LocaleId targetLocale) { TextFragment trgFrag = tu.getTargetSegment(targetLocale, seg.getId(), false).getContent(); StringBuilder srcText = new StringBuilder(seg.getContent().getCodedText()); StringBuilder trgText = new StringBuilder(trgFrag.getCodedText()); Matcher matcher; String corruptionRegex = "\\u00C3[\\u00A4-\\u00B6]|\\u00C3\\u201E|\\u00C3\\u2026|\\u00C3\\u2013"; // find corruption in source matcher = Pattern.compile(corruptionRegex).matcher(srcText); if (matcher.find() == true) { this.markSegmentForRemoval(tu, seg, targetLocale); } // find corruption in target matcher = Pattern.compile(corruptionRegex).matcher(trgText); if (matcher.find() == true) { this.markSegmentForRemoval(tu, seg, targetLocale); } }
private void writeSegment(Segment segment) { try { TextFragment content = segment.getContent(); String codedText = content.getCodedText(); List<Code> codes = content.getCodes(); for (int i = 0; i < codedText.length(); i++) { char c = codedText.charAt(i); if (TextFragment.isMarker(c)) { int codeIndex = TextFragment.toIndex(codedText.charAt(++i)); writeCode(codes.get(codeIndex)); } else { writeChar(c); } } } catch (Exception e) { LOGGER.error("Threw {} writing segment id {} '{}'", e.getClass().getSimpleName(), segment.getId(), segment.toString()); throw e; } }
StringBuilder trgText = new StringBuilder(tu.getTargetSegment(targetLocale, seg.getId(), false).text); boolean alreadyFound = false;
/** * Converts whitespace ({tab}, {space}, {CR}, {LF}) to single space. * * @param tu: the TextUnit containing the segments to update * @param seg: the Segment to update * @param targetLocale: the language for which the text should be updated */ protected void normalizeWhitespace(ITextUnit tu, Segment seg, LocaleId targetLocale) { TextFragment.unwrap(seg.getContent()); TextFragment.unwrap(tu.getTargetSegment(targetLocale, seg.getId(), false).getContent()); }
return seg.getId();
@Override public void alignCollapseAll(LocaleId trgLoc) { ContainerIterator ci = new ContainerIterator(trgLoc); // keeping track of collapsed containers to check which to set to // ALIGNED LinkedList<TextContainer> collapsed = new LinkedList<TextContainer>(); if (ci.hasSource()) { ci.getSource().joinAll(); ci.getSource().setHasBeenSegmentedFlag(false); collapsed.add(ci.getSource()); } if (ci.hasTarget()) { ci.getTarget().joinAll(); ci.getTarget().setHasBeenSegmentedFlag(false); collapsed.add(ci.getTarget()); } // mark target/source pairs aligned if both have been collapsed TextContainer src, trg; for (LocaleId loc : parent.getTargetLocales()) { src = getSource(loc); if (collapsed.contains(src)) { trg = parent.getTarget(loc); if (collapsed.contains(trg)) { trg.getSegments().setAlignmentStatus(AlignmentStatus.ALIGNED); // TODO: check that this is the desired behavior trg.getFirstSegment().id = src.getFirstSegment().getId(); } } } }
private void convert (TextPart srcPart, TextPart trgPart, Unit destUnit) { // Create the destination part Part destPart; if ( srcPart.isSegment() ) { destPart = destUnit.appendSegment(); } else { destPart = destUnit.appendIgnorable(); } // Transfer the source convert(srcPart.getContent(), destPart.getSource(), false); // Transfer the target (if needed) if ( trgPart != null ) { convert(trgPart.getContent(), destPart.getTarget(GetTarget.CREATE_EMPTY), true); } // Handle the Segment-specific data if ( destPart.isSegment() ) { Segment seg = (Segment)srcPart; destPart.setId(seg.getId()); //TODO: annotations } }
ISegments srcSegs = tu.getSourceSegments(); for (Segment srcSeg : srcSegs) { Segment trgSeg = tu.getTargetSegment(targetLocale, srcSeg.getId(), false);
for (Segment seg : segs) { if (acceptATA(seg.getAnnotation(AltTranslationsAnnotation.class))) { Segment srcSeg = srcSegments.get(seg.getId()); long segCount = count(srcSeg, srcLocale); segmentsCount += segCount;
writer.writeStartElement("alt-trans"); if ( segment != null ) { writer.writeAttributeString("mid", segment.getId());