public static void importNodes( int numRunners, Input input, BatchingNeoStores stores, IdMapper idMapper, ExecutionMonitor executionMonitor, Monitor monitor ) throws IOException { Supplier<EntityImporter> importers = () -> new NodeImporter( stores, idMapper, monitor ); importData( NODE_IMPORT_NAME, numRunners, input.nodes(), stores, importers, executionMonitor, new MemoryUsageStatsProvider( stores, idMapper ) ); }
public static DataStatistics importRelationships( int numRunners, Input input, BatchingNeoStores stores, IdMapper idMapper, Collector badCollector, ExecutionMonitor executionMonitor, Monitor monitor, boolean validateRelationshipData ) throws IOException { DataStatistics typeDistribution = new DataStatistics( monitor.nodes.sum(), monitor.properties.sum(), new RelationshipTypeCount[0] ); Supplier<EntityImporter> importers = () -> new RelationshipImporter( stores, idMapper, typeDistribution, monitor, badCollector, validateRelationshipData, stores.usesDoubleRelationshipRecordUnits() ); importData( RELATIONSHIP_IMPORT_NAME, numRunners, input.relationships(), stores, importers, executionMonitor, new MemoryUsageStatsProvider( stores, idMapper ) ); return typeDistribution; }
public void initialize( Input input ) throws IOException { log.info( "Import starting" ); startTime = currentTimeMillis(); this.input = input; PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor(); numberArrayFactory = auto( neoStore.getPageCache(), storeDir, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor ); badCollector = input.badCollector(); // Some temporary caches and indexes in the import idMapper = input.idMapper( numberArrayFactory ); nodeRelationshipCache = new NodeRelationshipCache( numberArrayFactory, config.denseNodeThreshold() ); Estimates inputEstimates = input.calculateEstimates( neoStore.getPropertyStore().newValueEncodedSizeCalculator() ); // Sanity checking against estimates new EstimationSanityChecker( recordFormats, monitor ).sanityCheck( inputEstimates ); new HeapSizeSanityChecker( monitor ).sanityCheck( inputEstimates, recordFormats, neoStore, nodeRelationshipCache.memoryEstimation( inputEstimates.numberOfNodes() ), idMapper.memoryEstimation( inputEstimates.numberOfNodes() ) ); dependencies.satisfyDependencies( inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor ); if ( neoStore.determineDoubleRelationshipRecordUnits( inputEstimates ) ) { monitor.doubleRelationshipRecordUnitsEnabled(); } executionMonitor.initialize( dependencies ); }
@Override public void doImport( Input input ) throws IOException { consume( "nodes", input.nodes().iterator(), nodeHeader, RandomEntityDataGenerator::convert ); consume( "relationships", input.relationships().iterator(), relationshipHeader, RandomEntityDataGenerator::convert ); }
@Test public void shouldCalculateCorrectEstimatesOnEmptyData() throws Exception { // given Groups groups = new Groups(); Collection<DataFactory> nodeData = asList( generateData( defaultFormatNodeFileHeader(), new MutableLong(), 0, 0, ":ID", "nodes-1.csv", groups ) ); Collection<DataFactory> relationshipData = asList( generateData( defaultFormatRelationshipFileHeader(), new MutableLong(), 0, 0, ":START_ID,:TYPE,:END_ID", "rels-1.csv", groups ) ); Input input = new CsvInput( nodeData, defaultFormatNodeFileHeader(), relationshipData, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, Collector.EMPTY, groups ); // when Input.Estimates estimates = input.calculateEstimates( new PropertyValueRecordSizeCalculator( LATEST_RECORD_FORMATS.property().getRecordSize( NO_STORE_HEADER ), parseInt( GraphDatabaseSettings.string_block_size.getDefaultValue() ), 0, parseInt( GraphDatabaseSettings.array_block_size.getDefaultValue() ), 0 ) ); // then assertEquals( 0, estimates.numberOfNodes() ); assertEquals( 0, estimates.numberOfRelationships() ); assertEquals( 0, estimates.numberOfRelationshipProperties() ); assertEquals( 0, estimates.numberOfNodeProperties() ); assertEquals( 0, estimates.numberOfNodeLabels() ); }
Collector collector = input.badCollector(); long numberOfBadEntries = collector.badEntries(); collector.close();
@Test public void shouldCloseDataIteratorsInTheEnd() throws Exception { // GIVEN CapturingDataFactories nodeData = new CapturingDataFactories( config -> charReader( "1" ), NO_DECORATOR ); CapturingDataFactories relationshipData = new CapturingDataFactories( config -> charReader( "1,1" ), defaultRelationshipType( "TYPE" ) ); IdType idType = IdType.STRING; Input input = new CsvInput( nodeData, header( entry( null, Type.ID, idType.extractor( extractors ) ) ), relationshipData, header( entry( null, Type.START_ID, idType.extractor( extractors ) ), entry( null, Type.END_ID, idType.extractor( extractors ) ) ), idType, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator iterator = input.nodes().iterator() ) { readNext( iterator ); } try ( InputIterator iterator = input.relationships().iterator() ) { readNext( iterator ); } // THEN assertClosed( nodeData.last() ); assertClosed( relationshipData.last() ); }
Input.Estimates estimates = input.calculateEstimates( new PropertyValueRecordSizeCalculator( format.property().getRecordSize( NO_STORE_HEADER ), parseInt( GraphDatabaseSettings.string_block_size.getDefaultValue() ), 0,
Collector collector = input.badCollector(); long numberOfBadEntries = collector.badEntries(); collector.close();
@Test public void shouldIgnoreEmptyExtraColumns() throws Exception { // GIVEN Iterable<DataFactory> data = DataFactories.datas( CsvInputTest.data( ":ID,one\n" + "1,test,\n" + "2,test,,additional" ) ); // WHEN Collector collector = mock( Collector.class ); Input input = new CsvInput( data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config( COMMAS ), collector ); // THEN try ( InputIterator nodes = input.nodes().iterator() ) { // THEN assertNextNode( nodes, 1L, properties( "one", "test" ), labels() ); assertNextNode( nodes, 2L, properties( "one", "test" ), labels() ); assertFalse( readNext( nodes ) ); } verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 1L ), eq( null ) ); verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2L ), eq( null ) ); verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2L ), eq( "additional" ) ); }
@Test public void shouldIgnoreRelationshipEntriesMarkedIgnoreUsingHeader() throws Exception { // GIVEN Iterable<DataFactory> data = DataFactories.datas( CsvInputTest.data( ":START_ID,:TYPE,:END_ID,prop:IGNORE,other:int\n" + "1,KNOWS,2,Mattias,10\n" + "2,KNOWS,3,Johan,111\n" + "3,KNOWS,4,Emil,12" ) ); Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator relationships = input.relationships().iterator() ) { assertNextRelationship( relationships, 1L, 2L, "KNOWS", new Object[] {"other", 10} ); assertNextRelationship( relationships, 2L, 3L, "KNOWS", new Object[] {"other", 111} ); assertNextRelationship( relationships, 3L, 4L, "KNOWS", new Object[] {"other", 12} ); assertFalse( readNext( relationships ) ); } }
public void initialize( Input input ) throws IOException { log.info( "Import starting" ); startTime = currentTimeMillis(); this.input = input; PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor(); numberArrayFactory = auto( neoStore.getPageCache(), storeDir, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor ); badCollector = input.badCollector(); // Some temporary caches and indexes in the import idMapper = input.idMapper( numberArrayFactory ); nodeRelationshipCache = new NodeRelationshipCache( numberArrayFactory, config.denseNodeThreshold() ); Estimates inputEstimates = input.calculateEstimates( neoStore.getPropertyStore().newValueEncodedSizeCalculator() ); // Sanity checking against estimates new EstimationSanityChecker( recordFormats, monitor ).sanityCheck( inputEstimates ); new HeapSizeSanityChecker( monitor ).sanityCheck( inputEstimates, recordFormats, neoStore, nodeRelationshipCache.memoryEstimation( inputEstimates.numberOfNodes() ), idMapper.memoryEstimation( inputEstimates.numberOfNodes() ) ); dependencies.satisfyDependencies( inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor ); if ( neoStore.determineDoubleRelationshipRecordUnits( inputEstimates ) ) { monitor.doubleRelationshipRecordUnitsEnabled(); } executionMonitor.initialize( dependencies ); }
@Test public void shouldPropagateExceptionFromFailingDecorator() throws Exception { // GIVEN RuntimeException failure = new RuntimeException( "FAILURE" ); Iterable<DataFactory> data = DataFactories.datas( CsvInputTest.data( ":ID,name\n1,Mattias", new FailingNodeDecorator( failure ) ) ); Input input = new CsvInput( data, defaultFormatNodeFileHeader(), datas(), defaultFormatNodeFileHeader(), IdType.INTEGER, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator nodes = input.nodes().iterator() ) { readNext( nodes ); } catch ( InputException e ) { // THEN assertSame( e.getCause(), failure ); } }
@Test public void shouldSkipRelationshipValidationIfToldTo() throws Exception { // GIVEN Iterable<DataFactory> data = datas( CsvInputTest.data( ":START_ID,:END_ID,:TYPE\n" + ",," ) ); Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator relationships = input.relationships().iterator() ) { readNext( relationships ); assertNull( visitor.startId() ); assertNull( visitor.endId() ); assertNull( visitor.stringType ); } }
@Test public void shouldNotParsePointPropertyValuesWithDuplicateKeys() throws Exception { // GIVEN DataFactory data = data( ":ID,name,point:Point\n" + "1,Johan,\" { height :0.01 ,longitude:5, latitude : -4.2, latitude : 4.2 } \"\n" ); Iterable<DataFactory> dataIterable = dataIterable( data ); Input input = new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator nodes = input.nodes().iterator() ) { // THEN readNext( nodes ); fail( "Should have failed when key assigned multiple times, but didn't." ); } catch ( InputException ignore ) { // this is fine } }
@Test public void shouldProvideDefaultRelationshipType() throws Exception { // GIVEN String defaultType = "DEFAULT"; String customType = "CUSTOM"; DataFactory data = data( ":START_ID,:END_ID,:TYPE\n" + "0,1,\n" + "1,2," + customType + "\n" + "2,1," + defaultType, defaultRelationshipType( defaultType ) ); Iterable<DataFactory> dataIterable = dataIterable( data ); Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), dataIterable, defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN/THEN try ( InputIterator relationships = input.relationships().iterator() ) { assertNextRelationship( relationships, 0L, 1L, defaultType, NO_PROPERTIES ); assertNextRelationship( relationships, 1L, 2L, customType, NO_PROPERTIES ); assertNextRelationship( relationships, 2L, 1L, defaultType, NO_PROPERTIES ); assertFalse( readNext( relationships ) ); } }
@Test public void shouldUseHeaderInformationToParsePoint() throws Exception { // GIVEN DataFactory data = data( ":ID,name,point:Point{crs:WGS-84}\n" + "0,Johan,\" { x :1 ,y:2 } \"\n" ); Iterable<DataFactory> dataIterable = dataIterable( data ); Input input = new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator nodes = input.nodes().iterator() ) { // THEN assertNextNode( nodes, 0L, new Object[]{"name", "Johan", "point", Values.pointValue( CoordinateReferenceSystem.WGS84, 1, 2)}, labels() ); assertFalse( readNext( nodes ) ); } }
@Test public void shouldDoWithoutRelationshipTypeHeaderIfDefaultSupplied() throws Exception { // GIVEN relationship data w/o :TYPE header String defaultType = "HERE"; DataFactory data = data( ":START_ID,:END_ID,name\n" + "0,1,First\n" + "2,3,Second\n", defaultRelationshipType( defaultType ) ); Iterable<DataFactory> dataIterable = dataIterable( data ); Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), dataIterable, defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator relationships = input.relationships().iterator() ) { // THEN assertNextRelationship( relationships, 0L, 1L, defaultType, properties( "name", "First" ) ); assertNextRelationship( relationships, 2L, 3L, defaultType, properties( "name", "Second" ) ); assertFalse( readNext( relationships ) ); } }
@Test public void shouldParsePointPropertyValuesWithCRSInHeader() throws Exception { // GIVEN DataFactory data = data( ":ID,name,point:Point{crs:WGS-84-3D}\n" + "0,Johan,\" { height :0.01 ,longitude:5, latitude : -4.2 } \"\n" ); Iterable<DataFactory> dataIterable = dataIterable( data ); Input input = new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN try ( InputIterator nodes = input.nodes().iterator() ) { // THEN assertNextNode( nodes, 0L, new Object[]{"name", "Johan", "point", Values.pointValue( CoordinateReferenceSystem.WGS84_3D, 5, -4.2, 0.01)}, labels() ); assertFalse( readNext( nodes ) ); } }
@Test public void shouldProvideRelationshipsFromCsvInput() throws Exception { // GIVEN IdType idType = IdType.STRING; Iterable<DataFactory> data = dataIterable( data( "node1,node2,KNOWS,1234567\n" + "node2,node10,HACKS,987654" ) ); Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), data, header( entry( "from", Type.START_ID, idType.extractor( extractors ) ), entry( "to", Type.END_ID, idType.extractor( extractors ) ), entry( "type", Type.TYPE, extractors.string() ), entry( "since", Type.PROPERTY, extractors.long_() ) ), idType, config( COMMAS ), silentBadCollector( 0 ) ); // WHEN/THEN try ( InputIterator relationships = input.relationships().iterator() ) { assertNextRelationship( relationships, "node1", "node2", "KNOWS", properties( "since", 1234567L ) ); assertNextRelationship( relationships, "node2", "node10", "HACKS", properties( "since", 987654L ) ); } }