/** * Returns the section titles. From this... */ public String pageAbstract(String text, String title) throws IOException { StringBuilder sb = new StringBuilder(); WikiMarkupParser wikiMarkupParser = WikiMarkupParser.getInstance(); String[] prefixes = {imagePrefix, filePrefix}; ParsedPage parsedPage = wikiMarkupParser.parsePage(text, prefixes); //logger.debug("+++"); //logger.debug(title); try { List<Section> sectionList = parsedPage.getSections(); if (sectionList.size() > 0) { Section section = sectionList.get(0); //logger.debug(section.getTitle()); //logger.debug(section.getText()); //logger.debug("---"); sb.append(title); sb.append(CharacterTable.HORIZONTAL_TABULATION); sb.append(section.getText().replace(CharacterTable.LINE_FEED, CharacterTable.SPACE).trim()); sb.append(CharacterTable.LINE_FEED); } } catch (Exception e) { logger.warn(e.getMessage()); } return sb.toString(); }
System.out.println(section.getText());
private void addPageExamples(ParsedPage parsedPage, ParsedPageTitle parsedPageTitle) //throws IOException { try { if (!disambiguationPageSet.contains(parsedPageTitle.getPage())) { Section firstSection = parsedPage.getSection(0); String leftContext = EMPTY_CONTEXT; String rightContext = EMPTY_CONTEXT; if (firstSection != null) { rightContext = firstSection.getText(); } if (parsedPageTitle.hasSuffix()) { leftContext = parsedPageTitle.getSuffix(); } if (parsedPageTitle.isCompliant()) { Example example = new Example(parsedPageTitle.getForm(), parsedPageTitle.getPage(), parsedPageTitle.getPage(), leftContext, rightContext, Example.CONTENT_FROM_PAGE); addExample(example); addNominalVariantExample(example); addPersonSurnameExample(example); addRedirectPageExamples(example, parsedPageTitle); //logger.debug(parsedPageTitle.getForm() + "\t" + parsedPageTitle.getPage() + "\tKEEP"); } else { //logger.warn(parsedPageTitle.getForm() + "\t" + parsedPageTitle.getPage() + "\tREMOVED"); } } } catch (Exception ex) { logger.error("Exception adding page examples for page " + parsedPageTitle.getPage() + " (" + exampleCounter.intValue() + ")\n" + ex); } }