/** * This function provides the raw bytes-based comparison for UTF8 strings. * Note that the comparison may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters. * But it works for single-byte character languages. */ public static int rawByteCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) { return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, true); }
public static int lowerCaseCompareTo(byte[] thisBytes, int thisStart, int thisLength, byte[] thatBytes, int thatStart, int thatLength) { return compareTo(thisBytes, thisStart, thisLength, thatBytes, thatStart, thatLength, true, false); }
@Override public int compareTo(byte[] bytes, int start, int length) { return UTF8StringUtil.compareTo(this.bytes, this.start, bytes, start); }
public static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) { return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, false); }
public static int lowerCaseCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) { return compareTo(thisBytes, thisStart, thatBytes, thatStart, true, false); }
private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase, boolean useRawByte) { int thisLength = getUTFLength(thisBytes, thisStart); int thatLength = getUTFLength(thatBytes, thatStart); int thisActualStart = thisStart + getNumBytesToStoreLength(thisLength); int thatActualStart = thatStart + getNumBytesToStoreLength(thatLength); return compareTo(thisBytes, thisActualStart, thisLength, thatBytes, thatActualStart, thatLength, useLowerCase, useRawByte); }
@Test public void testChinese() { byte[] bufferDe = writeStringToBytes("的"); byte[] bufferLi = writeStringToBytes("离"); int ret = compareTo(bufferDe, 0, bufferLi, 0); assertTrue(ret != 0); }
public void testCompare(String str1, String str2, OPTION option) throws IOException { byte[] buffer1 = writeStringToBytes(str1); byte[] buffer2 = writeStringToBytes(str2); switch (option) { case STANDARD: assertEquals(str1.compareTo(str2), compareTo(buffer1, 0, buffer2, 0)); int n1 = normalize(buffer1, 0); int n2 = normalize(buffer2, 0); assertTrue(isSameSign(str1.compareTo(str2), n1 - n2)); break; case RAW_BYTE: assertEquals(str1.compareTo(str2), rawByteCompareTo(buffer1, 0, buffer2, 0)); break; case LOWERCASE: assertEquals(str1.compareToIgnoreCase(str2), lowerCaseCompareTo(buffer1, 0, buffer2, 0)); break; } }