net.sf.okapi.common.resource.TextUnitUtil java code examples

/**
 * Extracts text from the given text fragment. Used to create a copy of the original string but without code
 * markers. The original string is not stripped of code markers, and remains intact.
 * 
 * @param textFragment
 *            TextFragment object with possible codes inside
 * @return The copy of the string, contained in TextFragment, but w/o code markers
 */
public static String getText (TextFragment textFragment) {
  return getText(textFragment, null);
}

/**
 * Creates a new text unit resource based on a given text container object becoming the source part of the text
 * unit.
 * 
 * @param source
 *            the given text container becoming the source part of the text unit.
 * @return a new text unit resource with the given text container object being its source part.
 */
public static ITextUnit buildTU (TextContainer source) {
  return buildTU(null, "", source, null, LocaleId.EMPTY, "");
}

/**
 * Simplifies all possible tags in the source part of a given text unit resource. If the
 * TextUnit has a target then skip simplification.
 * @param textUnit the given text unit
 * @param rules rules for the data-driven simplification
 * @param removeLeadingTrailingCodes true to remove leading and/or trailing codes
 * of the source part and place their text in the corresponding inter-segment TextPart.
 * @param mergeCodes true to merge adjacent codes, false to leave as-is
 */
public static void simplifyCodesPostSegmentation(ITextUnit textUnit, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes) {
  if (textUnit == null || TextUnitUtil.isEmpty(textUnit) || !textUnit.isTranslatable()) {
    return;
  }
  // source 
  simplifyCodesPostSegmentation(textUnit.getSource(), rules, removeLeadingTrailingCodes, mergeCodes);
  
  // codes can become desynchronized in target - depend on TextUnitMerger
  // to align codes and reset matching id's
  for (LocaleId tl : textUnit.getTargetLocales()) {
    simplifyCodesPostSegmentation(textUnit.getTarget(tl), rules, removeLeadingTrailingCodes, mergeCodes);
  }        
}

    cells.add(TextUnitUtil.buildTU(""));
columnNames.clear();
for (ITextUnit tu : cells) {
  String st = TextUnitUtil.getSourceText(tu).trim();
  columnNames.add(st);
  int colNumber = i + 1;
  if (TextUnitUtil.isEmpty(cell, true)) {  // only spaces, no translatable text
    sendAsSkeleton(cell);
    continue;
  ITextUnit temp = new TextUnit("temp", TextUnitUtil.getSourceText(cell)); 
  TextUnitUtil.trimTU(temp, true, true);
  String trimmedCell = TextUnitUtil.getSourceText(temp);
    if (tu == null) continue;										
    if (TextUnitUtil.isEmpty(cell, true)) {
        recordID = TextUnitUtil.getSourceText(cells.get(index));

TextUnitUtil.copySrcCodeDataToMatchingTrgCodes(srcSeg.text, qr.target, true, false, null, tu);
    trgSegs.append(ts);
  at = TextUnitUtil.addAltTranslation(ts,
    qr.toAltTranslation(srcSeg.text, getSourceLanguage(), getTargetLanguage()));
  at = TextUnitUtil.addAltTranslation(trgCont,
    qr.toAltTranslation(srcSeg.text, getSourceLanguage(), getTargetLanguage()));

TextFragment tf = TextUnitUtil.storeSegmentation(tc);
TextFragment[] res = simplifyAll(tf, removeLeadingTrailingCodes, mergeCodes);
  boolean hasLeading = !TextUnitUtil.isEmpty(res[0]);
  boolean hasTrailing = !TextUnitUtil.isEmpty(res[1]);
    res[0] = TextUnitUtil.extractSegMarkers(leadingMarkers, res[0], true);
    tf.insert(0, leadingMarkers, true);
    res[1] = TextUnitUtil.extractSegMarkers(trailingMarkers, res[1], true);
    tf.insert(-1, trailingMarkers, true);
  if (TextUnitUtil.isEmpty(res[0]) && TextUnitUtil.isEmpty(res[1])) res = null; 
TextUnitUtil.restoreSegmentation(tc, tf);
      removedIndexes.add(i);
    if (TextUnitUtil.isEmpty(res[0]))
      res[0] = sb.isEmpty() ? null : sb; 
      removedIndexes.add(i);
    if (TextUnitUtil.isEmpty(res[1]))
      res[1] = sb.isEmpty() ? null : sb;
TextUnitUtil.convertTextParts_whitespaceCodesToText(tc);

res = simplifyCodes(tc, rules, removeLeadingTrailingCodes, mergeCodes);			
res = simplifyCodes(tf, rules, removeLeadingTrailingCodes, mergeCodes);
GenericSkeleton tuSkel = TextUnitUtil.forceSkeleton(textUnit);
GenericSkeleton skel = new GenericSkeleton();
skel.add(TextUnitUtil.isEmpty(res[0]) ? null : TextFragmentUtil.toText(res[0]));
skel.addContentPlaceholder(textUnit);
skel.add(TextUnitUtil.isEmpty(res[1]) ? null : TextFragmentUtil.toText(res[1]));

  return false;
String st = getSourceText(textUnit);
if (st == null)
  return false;
    st.length() >= 2) {
  GenericSkeleton tuSkel = TextUnitUtil.forceSkeleton(textUnit);
  GenericSkeleton skel = new GenericSkeleton();
  setSourceText(textUnit, st.substring(startQualifierLen, Util.getLength(st)
      - endQualifierLen));

/**
 * Indicates if a given text unit resource is null, or its source part is null or empty. Whitespaces are not taken
 * into account, e.g. if the text unit contains only whitespaces, it's considered empty.
 * 
 * @param textUnit
 *            the text unit to check.
 * @return true if the given text unit resource is null, or its source part is null or empty.
 */
public static boolean hasSource (ITextUnit textUnit) {
  return !isEmpty(textUnit, true);
}

for (TextPart p : tc.getParts()) {
  if (p.isSegment()) {
    TextFragment[] res = simplifyCodes(p.text, rules, removeLeadingTrailingCodes, mergeCodes, true);
    if (removeLeadingTrailingCodes && res != null) {
        newParts.add(new TextPart(expandCodes(res[0])));
        newParts.add(new TextPart(expandCodes(res[1])));

GenericSkeleton tuSkel = TextUnitUtil.forceSkeleton(textUnit);
GenericSkeleton skel = new GenericSkeleton();
  trimLeading(source.getFirstContent(), skel);
  trimTrailing(source.getFirstContent(), skel);

if (textUnit == null) return false;
TextUnitUtil.trimTU(textUnit, true, true);
if (params.removeQualifiers) {
  if (TextUnitUtil.removeQualifiers(textUnit, params.textQualifier)) {
    textUnit.setProperty(new Property(PROP_QUALIFIED, "yes"));

if (skel == null) return TextProcessingResult.REJECTED;
GenericSkeleton targetSkel = TextUnitUtil.forceSkeleton(target);
if ( targetSkel == null ) return TextProcessingResult.REJECTED;
if ( !processTU(target) ) return TextProcessingResult.REJECTED;

TextUnitUtil.removeAndReplaceCodes(codedText, ISOLATED_CODE_REPLACEMENT_TEXT) :
TextUnitUtil.removeCodes(codedText);

  res.add(TextUnitUtil.getSourceText(textUnit));
else
  res.add(TextUnitUtil.getTargetText(textUnit, locId));

    if (TextUnitUtil.hasMergedCode(tp.text)) {
      tp.text = TextUnitUtil.expandCodes(tp.text);
      simplified = true;
  targetFromTran.joinAll();
} else {			
  if (TextUnitUtil.hasMergedCode(targetFromTran.getFirstContent())) {
    targetFromTran.setContent(TextUnitUtil.expandCodes(targetFromTran.getFirstContent()));
    simplified = true;

  TextUnitUtil.addAltTranslation(trgSeg,
    res.toAltTranslation(srcSeg.text, sourceLocale, targetLocale));
TextUnitUtil.addAltTranslation(trgCont,
  res.toAltTranslation(srcFrag, sourceLocale, targetLocale));

TextUnitUtil.copySrcCodeDataToMatchingTrgCodes(srcOriCont.getFirstContent(),
  trgTraCont.getFirstContent(), true, true, null, oriTu);

/**
 * Removes from the source part of a given text unit resource qualifiers (quotation marks etc.) around text.
 * 
 * @param textUnit
 *            the given text unit resource.
 * @param qualifier
 *            the qualifier to be removed before and after source text.
 * @return true if the qualifiers were found and removed
 */
public static boolean removeQualifiers (ITextUnit textUnit,
  String qualifier)
{
  return removeQualifiers(textUnit, qualifier, qualifier);
}

/**
 * Indicates if a given text unit resource is null, or its source part is null or empty. Whitespaces are not taken
 * into account, if ignoreWS = true, e.g. if the text unit contains only whitespaces, it's considered empty.
 * 
 * @param textUnit
 *            the text unit to check.
 * @param ignoreWS
 *            if true and the text unit contains only whitespaces, then the text unit is considered empty.
 * @return true if the given text unit resource is null, or its source part is null or empty.
 */
public static boolean isEmpty (ITextUnit textUnit,
  boolean ignoreWS)
{
  return ((textUnit == null) || Util.isEmpty(getSourceText(textUnit), ignoreWS));
}

Javadoc

Helper methods to manipulate TextFragment, TextContainer, and TextUnit objects.

Most used methods

getText
Extracts text from the given text fragment. Used to create a copy of the original string but without
addAltTranslation
Adds an AltTranslation object to a given TextContainer. The AltTranslationsAnnotationannotation is c
buildTU
Creates a new text unit resource based on a given text container object becoming the source part of
copySrcCodeDataToMatchingTrgCodes
Copies the aligned inline codes of the source to the corresponding target codes. WARNING: This metho
isEmpty
Indicates if a given text fragment object is null, or the text it contains is null or empty.
expandCodes
Expand codes that have been previously merged.
forceSkeleton
Makes sure that a given text unit contains a skeleton. If there's no skeleton already attached to th
getSourceText
Gets the coded text of the first part of a source part of a given text unit resource. If removeCodes
removeCodes
Removes all inline tags from the given TextFragment
removeQualifiers
Removes from the source part of a given un-segmented text unit resource qualifiers (parenthesis, quo
simplifyCodes
simplifyCodesPostSegmentation
Simplifies all possible tags in the source part of a given text unit resource. If the TextUnit has a

Popular in Java

Finding current android device location
findViewById (Activity)
setScale (BigDecimal)
setContentView (Activity)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
Table (org.hibernate.mapping)
A relational table
Runner (org.openjdk.jmh.runner)
Best plugins for Eclipse

How to useTextUnitUtil in net.sf.okapi.common.resource

Best Java code snippets using net.sf.okapi.common.resource.TextUnitUtil (Showing top 20 results out of 315)

How to use
TextUnitUtil
in
net.sf.okapi.common.resource