org.apache.hadoop.io.Text.bytesToCodePoint java code examples

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instantiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and
 * returns the translated string.
 *
 * @param input
 *          input string to perform the translation on
 * @return translated string
 */
private String processInput(Text input) {
 StringBuilder resultBuilder = new StringBuilder();
 // Obtain the byte buffer from the input string so we can traverse it code point by code point
 ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
 // Traverse the byte buffer containing the input string one code point at a time
 while (inputBytes.hasRemaining()) {
  int inputCodePoint = Text.bytesToCodePoint(inputBytes);
  // If the code point exists in deletion set, no need to emit out anything for this code point.
  // Continue on to the next code point
  if (deletionSet.contains(inputCodePoint)) {
   continue;
  }
  Integer replacementCodePoint = replacementMap.get(inputCodePoint);
  // If a replacement exists for this code point, emit out the replacement and append it to the
  // output string. If no such replacement exists, emit out the original input code point
  char[] charArray = Character.toChars((replacementCodePoint != null) ? replacementCodePoint
    : inputCodePoint);
  resultBuilder.append(charArray);
 }
 String resultString = resultBuilder.toString();
 return resultString;
}

int fromCodePoint = Text.bytesToCodePoint(fromBytes);
 int toCodePoint = Text.bytesToCodePoint(toBytes);

/**
 * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and
 * returns the translated string.
 *
 * @param input
 *          input string to perform the translation on
 * @return translated string
 */
private String processInput(Text input) {
 StringBuilder resultBuilder = new StringBuilder();
 // Obtain the byte buffer from the input string so we can traverse it code point by code point
 ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
 // Traverse the byte buffer containing the input string one code point at a time
 while (inputBytes.hasRemaining()) {
  int inputCodePoint = Text.bytesToCodePoint(inputBytes);
  // If the code point exists in deletion set, no need to emit out anything for this code point.
  // Continue on to the next code point
  if (deletionSet.contains(inputCodePoint)) {
   continue;
  }
  Integer replacementCodePoint = replacementMap.get(inputCodePoint);
  // If a replacement exists for this code point, emit out the replacement and append it to the
  // output string. If no such replacement exists, emit out the original input code point
  char[] charArray = Character.toChars((replacementCodePoint != null) ? replacementCodePoint
    : inputCodePoint);
  resultBuilder.append(charArray);
 }
 String resultString = resultBuilder.toString();
 return resultString;
}

int fromCodePoint = Text.bytesToCodePoint(fromBytes);
 int toCodePoint = Text.bytesToCodePoint(toBytes);

/**
 * Get the next code point from the ByteBuffer. Moves the position in the
 * ByteBuffer forward to the next code point.
 * @param param the source of bytes
 * @param defaultValue if there are no bytes left, use this value
 * @return the code point that was found at the front of the buffer.
 */
static int getNextCodepoint(ByteBuffer param, int defaultValue) {
 if (param.remaining() == 0) {
  return defaultValue;
 } else {
  return Text.bytesToCodePoint(param);
 }
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instantiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instantiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instantiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instatiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instantiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

/**
 * Returns the Unicode Scalar Value (32-bit integer value)
 * for the character at <code>position</code>. Note that this
 * method avoids using the converter or doing String instatiation
 * @return the Unicode scalar value at position or -1
 *          if the position is invalid or points to a
 *          trailing byte
 */
public int charAt(int position) {
 if (position > this.length) return -1; // too long
 if (position < 0) return -1; // duh.
  
 ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
 return bytesToCodePoint(bb.slice());
}

int fromCodePoint = Text.bytesToCodePoint(fromBytes);
 int toCodePoint = Text.bytesToCodePoint(toBytes);

/**
 * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and
 * returns the translated string.
 *
 * @param input
 *          input string to perform the translation on
 * @return translated string
 */
private String processInput(Text input) {
 StringBuilder resultBuilder = new StringBuilder();
 // Obtain the byte buffer from the input string so we can traverse it code point by code point
 ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
 // Traverse the byte buffer containing the input string one code point at a time
 while (inputBytes.hasRemaining()) {
  int inputCodePoint = Text.bytesToCodePoint(inputBytes);
  // If the code point exists in deletion set, no need to emit out anything for this code point.
  // Continue on to the next code point
  if (deletionSet.contains(inputCodePoint)) {
   continue;
  }
  Integer replacementCodePoint = replacementMap.get(inputCodePoint);
  // If a replacement exists for this code point, emit out the replacement and append it to the
  // output string. If no such replacement exists, emit out the original input code point
  char[] charArray = Character.toChars((replacementCodePoint != null) ? replacementCodePoint
    : inputCodePoint);
  resultBuilder.append(charArray);
 }
 String resultString = resultBuilder.toString();
 return resultString;
}

public void testbytesToCodePointWithInvalidUTF() {
 try {                 
  Text.bytesToCodePoint(ByteBuffer.wrap(new byte[] {-2}));
  fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
 } catch (BufferUnderflowException ex) {      
 } catch(Exception e) {
  fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
 }
}

public void testbytesToCodePointWithInvalidUTF() {
 try {                 
  Text.bytesToCodePoint(ByteBuffer.wrap(new byte[] {-2}));
  fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
 } catch (BufferUnderflowException ex) {      
 } catch(Exception e) {
  fail("testbytesToCodePointWithInvalidUTF error unexp exception !!!");
 }
}

/**
 * test {@code Text.bytesToCodePoint(bytes) } 
 * with {@code BufferUnderflowException}
 * 
 */
public void testBytesToCodePoint() {
 try {
  ByteBuffer bytes = ByteBuffer.wrap(new byte[] {-2, 45, 23, 12, 76, 89});                                      
  Text.bytesToCodePoint(bytes);      
  assertTrue("testBytesToCodePoint error !!!", bytes.position() == 6 );                      
 } catch (BufferUnderflowException ex) {
  fail("testBytesToCodePoint unexp exception");
 } catch (Exception e) {
  fail("testBytesToCodePoint unexp exception");
 }    
}

/**
 * test {@code Text.bytesToCodePoint(bytes) } 
 * with {@code BufferUnderflowException}
 * 
 */
public void testBytesToCodePoint() {
 try {
  ByteBuffer bytes = ByteBuffer.wrap(new byte[] {-2, 45, 23, 12, 76, 89});                                      
  Text.bytesToCodePoint(bytes);      
  assertTrue("testBytesToCodePoint error !!!", bytes.position() == 6 );                      
 } catch (BufferUnderflowException ex) {
  fail("testBytesToCodePoint unexp exception");
 } catch (Exception e) {
  fail("testBytesToCodePoint unexp exception");
 }    
}

int cp = Text.bytesToCodePoint(sourceBytes);

Javadoc

Returns the next code point at the current position in the buffer. The buffer's position will be incremented. Any mark set on this buffer will be changed by this method!

Popular methods of Text

<init>
Construct from a byte array.
toString
set
Set the Text to range of bytes
getBytes
Returns the raw bytes; however, only data up to #getLength() is valid. Please use #copyBytes() if yo
getLength
Returns the number of bytes in the byte array
equals
readString
Read a UTF8 encoded string with a maximum size
writeString
Write a UTF8 encoded string with a maximum size to out
readFields
write
compareTo
clear
Clear the string to empty.Note: For performance reasons, this call does not clear the underlying byt

Popular in Java

Reading from database using SQL prepared statement
compareTo (BigDecimal)
addToBackStack (FragmentTransaction)
getExternalFilesDir (Context)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
PrintStream (java.io)
Fake signature of an existing Java class.
String (java.lang)
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
CodeWhisperer alternatives

How to use bytesToCodePointmethodin org.apache.hadoop.io.Text

Best Java code snippets using org.apache.hadoop.io.Text.bytesToCodePoint (Showing top 19 results out of 315)

How to use
bytesToCodePoint
method
in
org.apache.hadoop.io.Text