/** * Opens a RawDocument for filtering * @param input a Raw Document to open and filter * @param generateSkeleton true if a skeleton should be generated */ public void open (RawDocument input, boolean generateSkeleton) { if (input==null) throw new OkapiException("RawDocument is null"); // save reference for cleanup in close this.input = input; setOptions(input.getSourceLocale(), input.getTargetLocale(), input.getEncoding(), generateSkeleton); if ( input.getInputCharSequence() != null ) { open(input.getInputCharSequence()); } else if ( input.getInputURI() != null ) { open(input.getInputURI()); LOGGER.debug("\nOpening {}", input.getInputURI().toString()); } else if ( input.getStream() != null ) { open(input.getStream()); } else { throw new OkapiException("InputResource has no input defined."); } }
return new Event(EventType.RAW_DOCUMENT, new RawDocument(is, outputEncoding, trgLoc));
@Override public void close () { if (input != null) { input.close(); } }
private String formatDocument (RawDocument rd) { return String.format("%s \t(%s, %s)", rd.getInputURI().getPath(), rd.getFilterConfigId(), rd.getEncoding()); }
@Override public void open (RawDocument input, boolean generateSkeleton) { this.input = input; if (( input.getInputURI() == null ) && ( input.getInputCharSequence() == null )) { // Cannot do this currently because of the double pass throw new OkapiBadFilterInputException("Direct stream input not supported for MIF."); } srcLang = input.getSourceLocale(); if ( input.getInputURI() != null ) { docName = input.getInputURI().getPath(); } input.setEncoding("UTF-8"); open(input.getStream(), input); }
/** * End the current filter processing and send the {@link Ending} {@link Event} */ @Override protected void endFilter() { super.endFilter(); // delete temp source file if (tempSourceInput != null) { tempSourceInput.close(); boolean success = (new File(tempSourceInput.getInputURI())).delete(); if (!success) { LOGGER.warn("Couldn't delete HTML Filter tidied temp file"); } } }
currentRawDocument.close(); } else if (input.getInputURI() != null) { setDocumentName(input.getInputURI().getPath()); input.setEncoding(detectedEncoding); setOptions(input.getSourceLocale(), input.getTargetLocale(), detectedEncoding, generateSkeleton); if (document != null) { document.close(); document = new StreamedSource(input.getReader()); } catch (IOException e) { throw new OkapiIOException("Filter could not open input stream", e);
LocaleId locale = input.getSourceLocale(); File t = File.createTempFile("~okapi-html-filter-preprocess", ".sourceTemp"); URI tempUri = t.toURI(); StreamedSource streamedSource = null; try { if (input.getEncoding() == RawDocument.UNKOWN_ENCODING) { input.setEncoding(encoding); streamedSource = new StreamedSource(input.getReader()); (input.getEncoding()==RawDocument.UNKOWN_ENCODING ? encoding : input.getEncoding()), (isXHTML ? " /" : "") )); needEncodingDeclaration = false; return new RawDocument(tempUri, encoding, locale); input.close(); if (streamedSource != null) { streamedSource.close();
@SuppressWarnings("unchecked") @Override public void open (RawDocument input, boolean generateSkeleton) { this.input = input; srcLoc = input.getSourceLocale(); trgLoc = input.getTargetLocale(); if ( input.getInputURI() == null ) { throw new OkapiBadFilterInputException("Only input URI is supported for this filter."); } docName = Util.getDirectoryName(input.getInputURI().getPath()); ITmSeeker seeker = TmSeekerFactory.createFileBasedTmSeeker(docName); //TODO: Not very clean way to get the iterator, maybe ITmSeeker should just includes Iterable methods iterator = ((Iterable<TranslationUnit>)seeker).iterator(); state = 1; }
/** * Logs the information about which batch item is about to be processed. This * method is called inside the loop that process the batch. * @param item the batch item that is about to be processed. */ protected void displayInput (IBatchItemContext item) { if ( item.getRawDocument(0).getInputURI() != null ) { logger.info("Input: {}",item.getRawDocument(0).getInputURI().getPath()); } else { logger.info("Input (No path available)"); } }
docURI = input.getInputURI(); if (input.getStream() != null) { StreamUtil.copy(input.getStream(), tempFile); docURI = Util.toURI(tempFile.getAbsolutePath()); srcLoc = input.getSourceLocale(); spreadIdGen = new IdGenerator(null, "spr"); storyIdGen = new IdGenerator(null, "sto");
public void processDocument (RawDocument rd) { rawDoc = rd; srcLoc = rawDoc.getSourceLocale(); trgLoc = rawDoc.getTargetLocale(); if ( !initDone ) { initialize(); } filter = fcMapper.createFilter(rd.getFilterConfigId(), filter); if ( filter == null ) { throw new OkapiException(String.format( "No filter available for the configuration '%s'.", rd.getFilterConfigId())); } processInput(); }
input.setEncoding("UTF-8"); // Default for XML, other should be auto-detected BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(input.getStream(), input.getEncoding()); detector.detectBom(); try (InputStreamReader r = new InputStreamReader(input.getStream(), inStreamCharset)) { xmlEventReader = fact.createXMLEventReader(r); isXLIFF2 = detectXLIFF2(xmlEventReader);
boolean generateSkeleton) Path path = Paths.get(input.getInputURI()); if ( input.getInputURI() == null ) { throw new OkapiIOException("No inputURI specified."); srcLoc = input.getSourceLocale(); parser = new CsvParser(path.toFile().getAbsolutePath(), params, fcMapper); sd.setMimeType(currentMimeType); if ( input.getInputURI() != null ) { sd.setName(input.getInputURI().getPath());
if (getInputCharSequence() != null) { try { byte[] bytes = inputCharSequence.toString().getBytes(getEncoding()); createdStream = StreamUtil.createResettableStream(new ByteArrayInputStream(bytes), bytes.length+1); } catch (UnsupportedEncodingException e) { throw new OkapiUnsupportedEncodingException(String.format( "The encoding '%s' is not supported.", getEncoding()), e); } else if (getInputURI() != null) { URL url = null; try { url = getInputURI().toURL(); createdStream = createResettableStream(url.openStream()); } catch (IllegalArgumentException e) { throw new OkapiIOException("Could not open the URI. The URI must be absolute: "
@Override public void open(RawDocument rawDocument, boolean generateSkeleton) { if (rawDocument == null) { throw new OkapiException("RawDocument is null"); } // keep reference so we can clean up this.rawDocument = rawDocument; if (rawDocument.getInputURI() != null) { open(rawDocument.getInputURI()); logger.debug("\nOpening {}", rawDocument.getInputURI().toString()); } else if (rawDocument.getStream() != null) { open(rawDocument.getStream()); } else { throw new OkapiException("InputResource has no input defined."); } }
private Event openDocument() { try { document = new Document(params, inputFactory, eventFactory); nextAction = NextAction.NEXT_IN_DOCUMENT; return document.open(START_DOCUMENT_ID, documentUri, rawDocument.getSourceLocale(), rawDocument.getEncoding(), createFilterWriter()); } catch (ZipException e) { throw new OkapiIOException("Error opening zipped input file."); } catch (IOException e) { throw new OkapiIOException("Error reading zipped input file.", e); } catch (XMLStreamException e) { throw new OkapiIOException("Error parsing XML content", e); } }
cmd = cmd.replace(Util.INPUT_ROOT_DIRECTORY_VAR, inputRootDir); Locale loc = rawDoc.getSourceLocale().toJavaLocale(); cmd = cmd.replace("${srcLangName}", loc.getDisplayLanguage(Locale.ENGLISH)); loc = rawDoc.getTargetLocale().toJavaLocale(); cmd = cmd.replace("${trgLangName}", loc.getDisplayLanguage(Locale.ENGLISH));
protected String detectEncoding(RawDocument input) { BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(input.getStream(), input.getEncoding()); // string input has a default BOM defined by java // do not remove it if (input.getInputCharSequence() != null) { detector.detectBom(); } else { detector.detectAndRemoveBom(); } setEncoding(detector.getEncoding()); hasUtf8Bom = detector.hasUtf8Bom(); hasUtf8Encoding = detector.hasUtf8Encoding(); hasBOM = detector.hasBom(); setNewlineType(detector.getNewlineType().toString()); Source parsedHeader = getParsedHeader(input.getStream()); String detectedEncoding = parsedHeader.getDocumentSpecifiedEncoding(); documentEncoding = detectedEncoding == null ? false : true; if (detectedEncoding == null && getEncoding() != null) { detectedEncoding = getEncoding(); LOGGER.debug("Cannot auto-detect encoding. Using the default encoding ({})", getEncoding()); } else if (getEncoding() == null) { detectedEncoding = parsedHeader.getEncoding(); // get best guess LOGGER.debug("Default encoding and detected encoding not found. Using best guess encoding ({})", detectedEncoding); } return detectedEncoding; }
@Override public Event handleRawDocument (Event event) { RawDocument rawDoc = null; try { rawDoc = (RawDocument)event.getResource(); File outFile = new File(outputURI); Util.createDirectories(outFile.getAbsolutePath()); StreamUtil.copy(rawDoc.getStream(), outFile); } catch ( Throwable e ) { throw new OkapiIOException("Error writing or copying a RawDocument.", e); } finally { if (rawDoc != null) { rawDoc.close(); } } // this steps writes RawDocument then eats the event return Event.NOOP_EVENT; }