@Test public void renderCustomTypePaper() throws Exception { // setup config.setTemplateFileNameForDocType("paper", "paper." + templateExtension); DocumentTypes.addDocumentType("paper"); db.updateSchema(); Crawler crawler = new Crawler(db, config); crawler.crawl(); Parser parser = new Parser(config); Renderer renderer = new Renderer(db, config); String filename = "published-paper.html"; File sampleFile = new File(sourceFolder.getPath() + File.separator + "content" + File.separator + "papers" + File.separator + filename); Map<String, Object> content = parser.processFile(sampleFile); content.put(Crawler.Attributes.URI, "/" + filename); renderer.render(content); File outputFile = new File(destinationFolder, filename); Assert.assertTrue(outputFile.exists()); // verify String output = FileUtils.readFileToString(outputFile, Charset.defaultCharset()); for (String string : getOutputStrings("paper")) { assertThat(output).contains(string); } }
StringBuilder sb = new StringBuilder(); sb.append("Processing [").append(sourceFile.getPath()).append("]... "); String sha1 = buildHash(sourceFile); String uri = buildURI(sourceFile); boolean process = true; DocumentStatus status = DocumentStatus.NEW; for (String docType : DocumentTypes.getDocumentTypes()) { status = findDocumentStatus(docType, uri, sha1); if (status == DocumentStatus.UPDATED) { sb.append(" : modified "); crawlSourceFile(sourceFile, sha1, uri); crawl(sourceFile);
private String buildURI(final File sourceFile) { String uri = FileUtil.asPath(sourceFile).replace(FileUtil.asPath(config.getContentFolder()), ""); if (useNoExtensionUri(uri)) { // convert URI from xxx.html to xxx/index.html uri = createNoExtensionUri(uri); } else { uri = createUri(uri); } // strip off leading / to enable generating non-root based sites if (uri.startsWith(FileUtil.URI_SEPARATOR_CHAR)) { uri = uri.substring(1, uri.length()); } return uri; }
public void crawl() { crawl(config.getContentFolder()); LOGGER.info("Content detected:"); for (String docType : DocumentTypes.getDocumentTypes()) { long count = db.getDocumentCount(docType); if (count > 0) { LOGGER.info("Parsed {} files of type: {}", count, docType); } } }
Crawler c = new Crawler(); c.schedule(seedDocument); c.waitUntilCompletion()
/** * Create default {@link Utensils} by a given {@link JBakeConfiguration} * @param config a {@link JBakeConfiguration} * @return a default {@link Utensils} instance */ public static Utensils createDefaultUtensils(JBakeConfiguration config) { JBakeConfigurationInspector inspector = new JBakeConfigurationInspector(config); inspector.inspect(); Utensils utensils = new Utensils(); utensils.setConfiguration(config); ContentStore contentStore = DBUtil.createDataStore(config); utensils.setContentStore(contentStore); utensils.setCrawler(new Crawler(contentStore, config)); utensils.setRenderer(new Renderer(contentStore, config)); utensils.setAsset(new Asset(config)); return utensils; } }
Map<String, Object> fileContents = parser.processFile(sourceFile); if (fileContents != null) { fileContents.put(Attributes.ROOTPATH, getPathToRoot(sourceFile)); fileContents.put(String.valueOf(DocumentAttributes.SHA1), sha1); fileContents.put(String.valueOf(DocumentAttributes.RENDERED), false);
crawler.crawl();
Thread thread = new Thread(new Crawler("https://www.google.com.pk/?gws_rd=cr&ei=-q8vUqqNDIny4QTLlYCwAQ#q=pakistan"/*new BasicDAO().getNonProcessedLink()*/));
Map<String, Object> fileContents = parser.processFile(sourceFile); if (fileContents != null) { fileContents.put(Attributes.ROOTPATH, getPathToRoot(sourceFile)); fileContents.put(String.valueOf(DocumentAttributes.SHA1), sha1); fileContents.put(String.valueOf(DocumentAttributes.RENDERED), false);
StringBuilder sb = new StringBuilder(); sb.append("Processing [").append(sourceFile.getPath()).append("]... "); String sha1 = buildHash(sourceFile); String uri = buildURI(sourceFile); boolean process = true; DocumentStatus status = DocumentStatus.NEW; for (String docType : DocumentTypes.getDocumentTypes()) { status = findDocumentStatus(docType, uri, sha1); if (status == DocumentStatus.UPDATED) { sb.append(" : modified "); crawlSourceFile(sourceFile, sha1, uri); crawl(sourceFile);
@Test public void renderWithPrettyUrls() throws Exception { config.setUriWithoutExtension(true); config.setPrefixForUriWithoutExtension("/blog"); Crawler crawler = new Crawler(db, config); crawler.crawl(); Assert.assertEquals(4, db.getDocumentCount("post")); Assert.assertEquals(3, db.getDocumentCount("page")); DocumentList documents = db.getPublishedPosts(); for (Map<String, Object> model : documents) { String noExtensionUri = "blog/\\d{4}/" + FilenameUtils.getBaseName((String) model.get("file")) + "/"; Assert.assertThat(model.get("noExtensionUri"), RegexMatcher.matches(noExtensionUri)); Assert.assertThat(model.get("uri"), RegexMatcher.matches(noExtensionUri + "index\\.html")); assertThat(model).containsEntry("rootpath", "../../../"); } }
@Test public void shouldCrawlRenderAndCopyAssets() throws Exception { configuration.setTemplateFolder( folder.newFolder("template") ); configuration.setContentFolder( folder.newFolder("content") ); configuration.setAssetFolder( folder.newFolder("assets") ); contentStore = spy(new ContentStore("memory", "documents"+ System.currentTimeMillis())); Crawler crawler = mock(Crawler.class); Renderer renderer = mock(Renderer.class); Asset asset = mock(Asset.class); Utensils utensils = new Utensils(); utensils.setConfiguration(configuration); utensils.setContentStore(contentStore); utensils.setRenderer(renderer); utensils.setCrawler(crawler); utensils.setAsset(asset); Oven oven = new Oven(utensils); oven.bake(); verify(contentStore, times(1)).startup(); verify(renderer,atLeastOnce()).renderIndex(anyString()); verify(crawler,times(1)).crawl(); verify(asset,times(1)).copy(); } }
private String buildURI(final File sourceFile) { String uri = FileUtil.asPath(sourceFile).replace(FileUtil.asPath(config.getContentFolder()), ""); if (useNoExtensionUri(uri)) { // convert URI from xxx.html to xxx/index.html uri = createNoExtensionUri(uri); } else { uri = createUri(uri); } // strip off leading / to enable generating non-root based sites if (uri.startsWith(FileUtil.URI_SEPARATOR_CHAR)) { uri = uri.substring(1, uri.length()); } return uri; }
var crawler = new Crawler();
@Before public void setup() throws Exception { currentLocale = Locale.getDefault(); Locale.setDefault(Locale.ENGLISH); ModelExtractorsDocumentTypeListener listener = new ModelExtractorsDocumentTypeListener(); DocumentTypes.addListener(listener); templateFolder = new File(sourceFolder, templateDir); if (!templateFolder.exists()) { throw new Exception("Cannot find template folder!"); } destinationFolder = folder.getRoot(); config.setDestinationFolder(destinationFolder); config.setTemplateFolder(templateFolder); for (String docType : DocumentTypes.getDocumentTypes()) { File templateFile = config.getTemplateFileByDocType(docType); if (templateFile != null) { String fileName = templateFile.getName(); String fileBaseName = fileName.substring(0, fileName.lastIndexOf(".")); config.setTemplateFileNameForDocType(docType, fileBaseName + "." + templateExtension); } } Assert.assertEquals(".html", config.getOutputExtension()); Crawler crawler = new Crawler(db, config); crawler.crawl(); parser = new Parser(config); renderer = new Renderer(db, config); setupExpectedOutputStrings(); }
public class Crawler() { public static void main(String[] args) { Crawler crawler = new Crawler(); String[] urls = new String[]{ "http://www.url.com/1", "http://www.url.com/2", "http://www.url.com/3" }; crawler.crawl(urls); try { Thread.sleep(1000 * 60 * 15); // without this the unit test exits too early } catch (InterruptedException e) { e.printStackTrace(); } } // the rest of the class definition }
/** * Create default {@link Utensils} by a given {@link JBakeConfiguration} * @param config a {@link JBakeConfiguration} * @return a default {@link Utensils} instance */ public static Utensils createDefaultUtensils(JBakeConfiguration config) { JBakeConfigurationInspector inspector = new JBakeConfigurationInspector(config); inspector.inspect(); Utensils utensils = new Utensils(); utensils.setConfiguration(config); ContentStore contentStore = DBUtil.createDataStore(config); utensils.setContentStore(contentStore); utensils.setCrawler(new Crawler(contentStore, config)); utensils.setRenderer(new Renderer(contentStore, config)); utensils.setAsset(new Asset(config)); return utensils; } }
@Test public void crawl() { Crawler crawler = new Crawler(db, config); crawler.crawl(); Assert.assertEquals(4, db.getDocumentCount("post")); Assert.assertEquals(3, db.getDocumentCount("page")); DocumentList results = db.getPublishedPosts(); assertThat(results.size()).isEqualTo(3); for (Map<String, Object> content : results) { assertThat(content) .containsKey(Crawler.Attributes.ROOTPATH) .containsValue("../../../"); } DocumentList allPosts = db.getAllContent("post"); assertThat(allPosts.size()).isEqualTo(4); for (Map<String, Object> content : allPosts) { if (content.get(Crawler.Attributes.TITLE).equals("Draft Post")) { assertThat(content).containsKey(Crawler.Attributes.DATE); } } // covers bug #213 DocumentList publishedPostsByTag = db.getPublishedPostsByTag("blog"); Assert.assertEquals(3, publishedPostsByTag.size()); }