private String extractLangTitle(DocumentMetadata dm) { List<String> titleList = new ArrayList<String>(); for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { if (language.equalsIgnoreCase(title.getLanguage())) { titleList.add(title.getText()); } } String docTitle; switch (titleList.size()) { case 0: logger.info("No title IN GIVEN LANG (" + language + ") out of " + dm.getBasicMetadata().getTitleCount() + " titles. Ignoring record!"); return null; case 1: docTitle = titleList.get(0); break; default: logger.info("Number of titles IN GIVEN LANGUAGE (" + language + ") is more then one. " + "Titles will be concatenated"); docTitle = Joiner.on(" ").join(titleList); break; } if (docTitle.trim().isEmpty()) { return null; } return docTitle; }
if(twl.getLanguage().toLowerCase().startsWith("en")){ title=twl.getText(); title = dm.getBasicMetadata().getTitle(0).getText();
sb.append(filterTextByLang(documentAbstract.getText(), lang.getLangCode()));
@Override public Map exec(Tuple input) throws IOException { try { DataByteArray protoMetadata = (DataByteArray) input.get(0); DocumentMetadata metadata = DocumentMetadata.parseFrom(protoMetadata.get()); String titles; String abstracts; List<String> titleList = new ArrayList<String>(); for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); } titles = Joiner.on(" ").join(titleList); List<String> abstractsList = new ArrayList<String>(); for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText()); } abstracts = Joiner.on(" ").join(abstractsList); Map<String, Object> map = new HashMap<String, Object>(); map.put("key", metadata.getKey()); map.put("title", titles); map.put("keywords", getConcatenated(metadata.getKeywordsList())); map.put("abstract", abstracts); map.put("categories", getCategories(metadata.getBasicMetadata().getClassifCodeList())); return map; } catch (Exception e) { logger.error("Error in processing input row:", e); throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e)); } }
basicMetadata.setDoi(doi); TextWithLanguage.Builder twl = TextWithLanguage.newBuilder(); twl.setText(title); basicMetadata.addTitle(twl);
void addTitle(String newTitle, BasicMetadata.Builder bmb) { List<? extends TextWithLanguageOrBuilder> list = bmb.getTitleOrBuilderList(); boolean found = false; for (TextWithLanguageOrBuilder twl : list) { if (twl.getText() != null && twl.getText().equalsIgnoreCase(newTitle)) { found = true; } } if (!found) { TextWithLanguage.Builder twlb = TextWithLanguage.newBuilder(); twlb.setText(newTitle); bmb.addTitle(twlb); } }
@Override public Tuple exec(Tuple tuple) throws IOException { DataByteArray dba = (DataByteArray) tuple.get(1); DocumentProtos.DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(dba.get()); String id = docWrapper.getDocumentMetadata().getKey(); String title = docWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText(); Tuple retTuple = TupleFactory.getInstance().newTuple(Arrays.asList(id, title)); return retTuple; } }
private void appendToOutput(Tuple output, String field, List<TextWithLanguage> someList) throws ExecException { ArrayList<String> al = new ArrayList<String>(); for (TextWithLanguage twl : someList) { al.add(removeAllPigUnfriendlySigns(twl.getText())); } output.set(fieldNumberMap.get(field), Joiner.on(" ").join(al)); }
private String extractLangAbstract(DocumentMetadata dm) { String docAbstract; List<String> abstractsList = new ArrayList<String>(); for (TextWithLanguage documentAbstract : dm.getDocumentAbstractList()) { if (language.equalsIgnoreCase(documentAbstract.getLanguage())) { abstractsList.add(documentAbstract.getText()); } } docAbstract = Joiner.on(" ").join(abstractsList); return docAbstract; }
private void convertTitles(DocumentWrapper dw, SolrInputDocument doc) { List<TextWithLanguage> titles = dw.getDocumentMetadata() .getBasicMetadata().getTitleList(); for (TextWithLanguage title : titles) { doc.addField(SolrIndexConstants.DOC_TITLE_FIELD_NAME, title.getText()); } }
/** * documentWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText() * * */ public static String getMainTitle(DocumentProtos.DocumentMetadata documentMetadata) { BasicMetadata basicMetadata = documentMetadata.getBasicMetadata(); if (basicMetadata.getTitleCount() > 0) { return basicMetadata.getTitle(0).getText(); } else { return ""; } }