@Override public String toString() { return "PieceDescriptor (pos: " + getFilePosition() + "; " + ( isUnicode() ? "unicode" : "non-unicode" ) + "; prm: " + getPrm() + ")"; } }
public SinglentonTextPiece( StringBuilder buffer ) throws IOException { super( 0, buffer.length(), buffer.toString().getBytes( "UTF-16LE" ), new PieceDescriptor( new byte[8], 0 ) ); }
@Override public boolean equals(Object o) { if (!(o instanceof TextPiece)) return false; TextPiece tp = (TextPiece) o; assert (_buf != null && tp._buf != null && _pd != null && tp._pd != null); return ( limitsAreEqual(o) && tp._usesUnicode == this._usesUnicode && tp._buf.toString().equals(this._buf.toString()) && tp._pd.equals(this._pd) ); }
/** * Create the StringBuilder from the text and unicode flag */ private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) { if (StringUtil.BIG5.equals(pd.getCharset())) { return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length)); } String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset()); return new StringBuilder(str); }
PieceDescriptor.getSizeInBytes()); pieces[x] = new PieceDescriptor(node.getBytes(), 0); _cpMin = pieces[0].getFilePosition() - fcMin; for (PieceDescriptor piece : pieces) { int start = piece.getFilePosition() - fcMin; if (start < _cpMin) { _cpMin = start; int start = pieces[x].getFilePosition(); GenericPropertyNode node = pieceTable.getProperty(x); boolean unicode = pieces[x].isUnicode(); int multiple = 1; if (unicode) {
public byte[] writeTo(ByteArrayOutputStream docStream) throws IOException { PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes()); // int fcMin = docStream.getOffset(); for (TextPiece next : _textPieces) { PieceDescriptor pd = next.getPieceDescriptor(); int offset = docStream.size(); int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE); if (mod != 0) { mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod; byte[] buf = IOUtils.safelyAllocate(mod, MAX_RECORD_LENGTH); docStream.write(buf); } // set the text piece position to the current docStream offset. pd.setFilePosition(docStream.size()); // write the text to the docstream and save the piece descriptor to // the // plex which will be written later to the tableStream. docStream.write(next.getRawBytes()); // The TextPiece is already in characters, which // makes our life much easier int nodeStart = next.getStart(); int nodeEnd = next.getEnd(); textPlex.addProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.toByteArray())); } return textPlex.toByteArray(); }
throw new IllegalStateException("Text pieces table is empty"); if (_textPiecesFCOrder.get(0).getPieceDescriptor().getFilePosition() > startBytePos) return _textPiecesFCOrder.get(0).getPieceDescriptor().getFilePosition(); .getPieceDescriptor().getFilePosition() <= startBytePos) return startBytePos; int mid = (low + high) >>> 1; final TextPiece textPiece = _textPiecesFCOrder.get(mid); int midVal = textPiece.getPieceDescriptor().getFilePosition(); else return textPiece.getPieceDescriptor().getFilePosition(); .getFilePosition() < startBytePos; .getFilePosition() > startBytePos; return _textPiecesFCOrder.get(low + 1).getPieceDescriptor().getFilePosition();
/** * Create the StringBuilder from the text and unicode flag */ private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) { String str; try { if(pd.isUnicode()) { str = new String(text, "UTF-16LE"); } else { str = new String(text, "Cp1252"); } } catch(UnsupportedEncodingException e) { throw new RuntimeException("Your Java is broken! It doesn't know about basic, required character encodings!"); } return new StringBuilder(str); }
/** * * @param guessedCharset charset that we think this is * @return a new text piece * @throws IllegalStateException if the length isn't correct */ private TextPiece buildTextPiece(Charset guessedCharset) throws IllegalStateException { PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 0,0,0,127, 0,0}, 0, guessedCharset); pd.setFilePosition(_fib.getFibBase().getFcMin()); // Generate a single Text Piece Table, with a single Text Piece // which covers all the (8 bit only) text in the file tpt = new OldTextPieceTable(); byte[] textData = IOUtils.safelyAllocate( _fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH); System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), textData, 0, textData.length); int numChars = textData.length; if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) { numChars /= 2; } return new TextPiece( 0, numChars, textData, pd ); }
.getTextPieces() ) PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); if ( !prm.isComplex() ) continue;
@Override protected int getEncodingMultiplier(TextPiece textPiece) { Charset charset = textPiece.getPieceDescriptor().getCharset(); if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) { return 2; } return 1; } }
PieceDescriptor.getSizeInBytes() ); pieces[x] = new PieceDescriptor( node.getBytes(), 0 ); _cpMin = pieces[0].getFilePosition() - fcMin; for ( int x = 0; x < pieces.length; x++ ) int start = pieces[x].getFilePosition() - fcMin; if ( start < _cpMin ) int start = pieces[x].getFilePosition(); GenericPropertyNode node = pieceTable.getProperty( x ); boolean unicode = pieces[x].isUnicode(); int multiple = 1; if ( unicode )
public byte[] writeTo( HWPFOutputStream docStream ) throws IOException PlexOfCps textPlex = new PlexOfCps( PieceDescriptor.getSizeInBytes() ); pd.setFilePosition( docStream.getOffset() ); int nodeEnd = next.getEnd(); textPlex.addProperty( new GenericPropertyNode( nodeStart, nodeEnd, pd.toByteArray() ) );
throw new IllegalStateException( "Text pieces table is empty" ); if ( _textPiecesFCOrder.get( 0 ).getPieceDescriptor().getFilePosition() > startBytePos ) return _textPiecesFCOrder.get( 0 ).getPieceDescriptor().getFilePosition(); .getPieceDescriptor().getFilePosition() <= startBytePos ) return startBytePos; int midVal = textPiece.getPieceDescriptor().getFilePosition(); else return textPiece.getPieceDescriptor().getFilePosition(); .getFilePosition() < startBytePos; .getFilePosition() > startBytePos; return _textPiecesFCOrder.get( low + 1 ).getPieceDescriptor().getFilePosition();
/** * @param start Beginning offset in main document stream, in characters. * @param end Ending offset in main document stream, in characters. * @param text The raw bytes of our text */ public TextPiece(int start, int end, byte[] text, PieceDescriptor pd) { super(start, end, buildInitSB(text, pd)); _usesUnicode = pd.isUnicode(); _pd = pd; // Validate int textLength = ((CharSequence) _buf).length(); if (end - start != textLength) { throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!"); } if (end < start) { throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end); } }
PieceDescriptor pd = new PieceDescriptor(new byte[] {0,0, 0,0,0,127, 0,0}, 0); pd.setFilePosition(_fib.getFibBase().getFcMin());
.getTextPieces() ) PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); if ( !prm.isComplex() ) continue;
PieceDescriptor.getSizeInBytes()); pieces[x] = new PieceDescriptor(node.getBytes(), 0, charset); _cpMin = pieces[0].getFilePosition() - fcMin; for (PieceDescriptor piece : pieces) { int start = piece.getFilePosition() - fcMin; if (start < _cpMin) { _cpMin = start; int start = pieces[x].getFilePosition(); GenericPropertyNode node = pieceTable.getProperty(x); boolean unicode = pieces[x].isUnicode(); int multiple = 1; if (unicode ||
@Override public String toString() { return "PieceDescriptor (pos: " + getFilePosition() + "; " + (isUnicode() ? "unicode" : "non-unicode") + "; prm: " + getPrm() + ")"; } }
public boolean isIndexInTable(int bytePos) { for (TextPiece tp : _textPiecesFCOrder) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); if (bytePos > pieceStart + tp.bytesLength()) { continue; } if (pieceStart > bytePos) { return false; } return true; } return false; }