private void setLists(Article article, ParsedPage page) { List<List<String>> lists = new ArrayList<List<String>>(); for (DefinitionList dl : page.getDefinitionLists()) { List<String> l = new ArrayList<String>(); for (ContentElement c : dl.getDefinitions()) { l.add(c.getText()); } lists.add(l); } for (NestedListContainer dl : page.getNestedLists()) { List<String> l = new ArrayList<String>(); for (NestedList nl : dl.getNestedLists()) { l.add(nl.getText()); } lists.add(l); } article.setLists(lists); }
/** * Removes all empty Structures from a NestedListContainer and all substructures. */ public static NestedListContainer eliminateEmptyStructures( NestedListContainer nlc ){ for(int i=nlc.size()-1; i>=0; i--){ NestedList nl = nlc.getNestedList(i); if( nl.getClass()==NestedListContainer.class ) eliminateEmptyStructures( (NestedListContainer)nl ); if( nl.empty() )nlc.remove( nl ); } return nlc; }
private void setLinksInLists(final List<Link> links, final List<Link> externalLinks, ParsedPage page){ int listId = 0; for (NestedListContainer p : page.getNestedLists()){ int item = 0; for (NestedList list : p.getNestedLists()) { for (de.tudarmstadt.ukp.wikipedia.parser.Link link : list.getLinks()) { Link linkAdded = addLink(links, externalLinks, link, Link.Type.LIST); if (linkAdded != null) { linkAdded.setListId(listId); linkAdded.setListItem(item); } } item++; } listId++; } }
result += nl.getText(); }else{ result += "---"; // If it is not a NestedListElement, it is a NestedListContainer