/** * * Returns the set of names of all templates that are contained in the given * article (without duplicates).<br> * * Note: The names are SQL escaped using {@link StringUtils#sqlEscape(String)}. * * @param pageText * the page to get the templates from * @return a set of template names (without duplicates) */ private Set<String> getTemplateNames(String pageText) { Set<String> names = new HashSet<String>(); if (!pageText.isEmpty()) { try { ParsedPage pp = parser.parse(pageText); List<Template> templates = pp.getTemplates(); for (Template t : templates) { names.add(StringUtils.sqlEscape(t.getName().toLowerCase())); } } catch (Exception e) { // Most likely parsing problems logger.error("Problems parsing page!", e); } } return names; }
private void setTemplates(Article article, ParsedPage page) { final List<Template> templates = new ArrayList<Template>(10); for (final de.tudarmstadt.ukp.wikipedia.parser.Template t : page .getTemplates()) { final List<String> templateParameters = t.getParameters(); parseTemplatesSchema(article, templateParameters); if (t.getName().toLowerCase().startsWith("infobox")) { article.setInfobox(new Template(t.getName(), templateParameters)); } else { templates.add(new Template(t.getName(), templateParameters)); } } article.setTemplates(templates); }
tplLoop:for(Template tpl:pp.getTemplates()){ if(tpl.getName().equalsIgnoreCase(templateName)){ containsTpl=true;
/** * Does the same as revisionContainsTemplateFragment() without using a template index * * @param revId * @param templateFragment * @return * @throws WikiApiException */ public boolean revisionContainsTemplateFragmentWithoutIndex(int revId, String templateFragment) throws WikiApiException{ if(revApi==null){ revApi = new RevisionApi(wiki.getDatabaseConfiguration()); } if(parser==null){ //TODO switch to SWEBLE MediaWikiParserFactory pf = new MediaWikiParserFactory( wiki.getDatabaseConfiguration().getLanguage()); pf.setTemplateParserClass(ShowTemplateNamesAndParameters.class); parser = pf.createParser(); } List<Template> tplList = parser.parse(revApi.getRevision(revId).getRevisionText()).getTemplates(); for(Template tpl:tplList){ if(tpl.getName().toLowerCase().startsWith(templateFragment.toLowerCase())){ return true; } } return false; }
for( Template t: pp.getTemplates()){ nrOfTemplates++; String templateName = t.getName().toLowerCase();
if( pp.nrOfNestedLists() != 0 )nrOfPagesWithNl++; if( pp.nrOfTables() != 0 ) nrOfPagesWithTables++; if( pp.getTemplates().size() != 0 ) nrOfPagesWithTemplates++; if( pp.getSections().size()>1 )nrOfPagesWithSubSections++;
/** * Does the same as revisionContainsTemplateName() without using a template index * * @param revId * @param templateName * @return * @throws WikiApiException */ public boolean revisionContainsTemplateNameWithoutIndex(int revId, String templateName) throws WikiApiException{ if(revApi==null){ revApi = new RevisionApi(wiki.getDatabaseConfiguration()); } if(parser==null){ //TODO switch to SWEBLE MediaWikiParserFactory pf = new MediaWikiParserFactory( wiki.getDatabaseConfiguration().getLanguage()); pf.setTemplateParserClass(ShowTemplateNamesAndParameters.class); parser = pf.createParser(); } List<Template> tplList = parser.parse(revApi.getRevision(revId).getRevisionText()).getTemplates(); for(Template tpl:tplList){ if(tpl.getName().equalsIgnoreCase(templateName)){ return true; } } return false; }