org.neo4j.unsafe.impl.batchimport.input.Input java code examples

public static void importNodes( int numRunners, Input input, BatchingNeoStores stores, IdMapper idMapper,
    ExecutionMonitor executionMonitor, Monitor monitor )
        throws IOException
{
  Supplier<EntityImporter> importers = () -> new NodeImporter( stores, idMapper, monitor );
  importData( NODE_IMPORT_NAME, numRunners, input.nodes(), stores, importers, executionMonitor,
      new MemoryUsageStatsProvider( stores, idMapper ) );
}

public static DataStatistics importRelationships( int numRunners, Input input,
    BatchingNeoStores stores, IdMapper idMapper, Collector badCollector, ExecutionMonitor executionMonitor,
    Monitor monitor, boolean validateRelationshipData )
        throws IOException
{
  DataStatistics typeDistribution = new DataStatistics( monitor.nodes.sum(), monitor.properties.sum(), new RelationshipTypeCount[0] );
  Supplier<EntityImporter> importers = () -> new RelationshipImporter( stores, idMapper, typeDistribution, monitor,
      badCollector, validateRelationshipData, stores.usesDoubleRelationshipRecordUnits() );
  importData( RELATIONSHIP_IMPORT_NAME, numRunners, input.relationships(), stores, importers, executionMonitor,
      new MemoryUsageStatsProvider( stores, idMapper ) );
  return typeDistribution;
}

public void initialize( Input input ) throws IOException
{
  log.info( "Import starting" );
  startTime = currentTimeMillis();
  this.input = input;
  PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor();
  numberArrayFactory = auto( neoStore.getPageCache(), storeDir, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor );
  badCollector = input.badCollector();
  // Some temporary caches and indexes in the import
  idMapper = input.idMapper( numberArrayFactory );
  nodeRelationshipCache = new NodeRelationshipCache( numberArrayFactory, config.denseNodeThreshold() );
  Estimates inputEstimates = input.calculateEstimates( neoStore.getPropertyStore().newValueEncodedSizeCalculator() );
  // Sanity checking against estimates
  new EstimationSanityChecker( recordFormats, monitor ).sanityCheck( inputEstimates );
  new HeapSizeSanityChecker( monitor ).sanityCheck( inputEstimates, recordFormats, neoStore,
      nodeRelationshipCache.memoryEstimation( inputEstimates.numberOfNodes() ),
      idMapper.memoryEstimation( inputEstimates.numberOfNodes() ) );
  dependencies.satisfyDependencies( inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor );
  if ( neoStore.determineDoubleRelationshipRecordUnits( inputEstimates ) )
  {
    monitor.doubleRelationshipRecordUnitsEnabled();
  }
  executionMonitor.initialize( dependencies );
}

@Override
public void doImport( Input input ) throws IOException
{
  consume( "nodes", input.nodes().iterator(), nodeHeader, RandomEntityDataGenerator::convert );
  consume( "relationships", input.relationships().iterator(), relationshipHeader, RandomEntityDataGenerator::convert );
}

@Test
public void shouldCalculateCorrectEstimatesOnEmptyData() throws Exception
{
  // given
  Groups groups = new Groups();
  Collection<DataFactory> nodeData = asList( generateData( defaultFormatNodeFileHeader(), new MutableLong(), 0, 0, ":ID", "nodes-1.csv", groups ) );
  Collection<DataFactory> relationshipData = asList( generateData( defaultFormatRelationshipFileHeader(), new MutableLong(),
      0, 0, ":START_ID,:TYPE,:END_ID", "rels-1.csv", groups ) );
  Input input = new CsvInput( nodeData, defaultFormatNodeFileHeader(), relationshipData, defaultFormatRelationshipFileHeader(),
      IdType.INTEGER, COMMAS, Collector.EMPTY, groups );
  // when
  Input.Estimates estimates = input.calculateEstimates( new PropertyValueRecordSizeCalculator(
      LATEST_RECORD_FORMATS.property().getRecordSize( NO_STORE_HEADER ),
      parseInt( GraphDatabaseSettings.string_block_size.getDefaultValue() ), 0,
      parseInt( GraphDatabaseSettings.array_block_size.getDefaultValue() ), 0 ) );
  // then
  assertEquals( 0, estimates.numberOfNodes() );
  assertEquals( 0, estimates.numberOfRelationships() );
  assertEquals( 0, estimates.numberOfRelationshipProperties() );
  assertEquals( 0, estimates.numberOfNodeProperties() );
  assertEquals( 0, estimates.numberOfNodeLabels() );
}

Collector collector = input.badCollector();
long numberOfBadEntries = collector.badEntries();
collector.close();

@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception
{
  // GIVEN
  CapturingDataFactories nodeData = new CapturingDataFactories( config -> charReader( "1" ), NO_DECORATOR );
  CapturingDataFactories relationshipData = new CapturingDataFactories( config -> charReader( "1,1" ),
      defaultRelationshipType( "TYPE" ) );
  IdType idType = IdType.STRING;
  Input input = new CsvInput(
      nodeData, header(
          entry( null, Type.ID, idType.extractor( extractors ) ) ),
      relationshipData, header(
          entry( null, Type.START_ID, idType.extractor( extractors ) ),
          entry( null, Type.END_ID, idType.extractor( extractors ) ) ),
      idType, config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator iterator = input.nodes().iterator() )
  {
    readNext( iterator );
  }
  try ( InputIterator iterator = input.relationships().iterator() )
  {
    readNext( iterator );
  }
  // THEN
  assertClosed( nodeData.last() );
  assertClosed( relationshipData.last() );
}

Input.Estimates estimates = input.calculateEstimates( new PropertyValueRecordSizeCalculator(
    format.property().getRecordSize( NO_STORE_HEADER ),
    parseInt( GraphDatabaseSettings.string_block_size.getDefaultValue() ), 0,

Collector collector = input.badCollector();
long numberOfBadEntries = collector.badEntries();
collector.close();

@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception
{
  // GIVEN
  Iterable<DataFactory> data = DataFactories.datas( CsvInputTest.data(
      ":ID,one\n" +
      "1,test,\n" +
      "2,test,,additional" ) );
  // WHEN
  Collector collector = mock( Collector.class );
  Input input = new CsvInput( data, defaultFormatNodeFileHeader(),
      datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config( COMMAS ), collector );
  // THEN
  try ( InputIterator nodes = input.nodes().iterator() )
  {
    // THEN
    assertNextNode( nodes, 1L, properties( "one", "test" ), labels() );
    assertNextNode( nodes, 2L, properties( "one", "test" ), labels() );
    assertFalse( readNext( nodes ) );
  }
  verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 1L ), eq( null ) );
  verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2L ), eq( null ) );
  verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2L ), eq( "additional" ) );
}

@Test
public void shouldIgnoreRelationshipEntriesMarkedIgnoreUsingHeader() throws Exception
{
  // GIVEN
  Iterable<DataFactory> data = DataFactories.datas( CsvInputTest.data(
      ":START_ID,:TYPE,:END_ID,prop:IGNORE,other:int\n" +
      "1,KNOWS,2,Mattias,10\n" +
      "2,KNOWS,3,Johan,111\n" +
      "3,KNOWS,4,Emil,12" ) );
  Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER,
      config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator relationships = input.relationships().iterator() )
  {
    assertNextRelationship( relationships, 1L, 2L, "KNOWS", new Object[] {"other", 10} );
    assertNextRelationship( relationships, 2L, 3L, "KNOWS", new Object[] {"other", 111} );
    assertNextRelationship( relationships, 3L, 4L, "KNOWS", new Object[] {"other", 12} );
    assertFalse( readNext( relationships ) );
  }
}

public void initialize( Input input ) throws IOException
{
  log.info( "Import starting" );
  startTime = currentTimeMillis();
  this.input = input;
  PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor();
  numberArrayFactory = auto( neoStore.getPageCache(), storeDir, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor );
  badCollector = input.badCollector();
  // Some temporary caches and indexes in the import
  idMapper = input.idMapper( numberArrayFactory );
  nodeRelationshipCache = new NodeRelationshipCache( numberArrayFactory, config.denseNodeThreshold() );
  Estimates inputEstimates = input.calculateEstimates( neoStore.getPropertyStore().newValueEncodedSizeCalculator() );
  // Sanity checking against estimates
  new EstimationSanityChecker( recordFormats, monitor ).sanityCheck( inputEstimates );
  new HeapSizeSanityChecker( monitor ).sanityCheck( inputEstimates, recordFormats, neoStore,
      nodeRelationshipCache.memoryEstimation( inputEstimates.numberOfNodes() ),
      idMapper.memoryEstimation( inputEstimates.numberOfNodes() ) );
  dependencies.satisfyDependencies( inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor );
  if ( neoStore.determineDoubleRelationshipRecordUnits( inputEstimates ) )
  {
    monitor.doubleRelationshipRecordUnitsEnabled();
  }
  executionMonitor.initialize( dependencies );
}

@Test
public void shouldPropagateExceptionFromFailingDecorator() throws Exception
{
  // GIVEN
  RuntimeException failure = new RuntimeException( "FAILURE" );
  Iterable<DataFactory> data =
      DataFactories.datas( CsvInputTest.data( ":ID,name\n1,Mattias",
          new FailingNodeDecorator( failure ) ) );
  Input input = new CsvInput( data, defaultFormatNodeFileHeader(), datas(), defaultFormatNodeFileHeader(), IdType.INTEGER,
      config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator nodes = input.nodes().iterator() )
  {
    readNext( nodes );
  }
  catch ( InputException e )
  {
    // THEN
    assertSame( e.getCause(), failure );
  }
}

@Test
public void shouldSkipRelationshipValidationIfToldTo() throws Exception
{
 // GIVEN
  Iterable<DataFactory> data = datas( CsvInputTest.data(
      ":START_ID,:END_ID,:TYPE\n" +
      ",," ) );
  Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), data, defaultFormatRelationshipFileHeader(), IdType.INTEGER,
      config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator relationships = input.relationships().iterator() )
  {
    readNext( relationships );
    assertNull( visitor.startId() );
    assertNull( visitor.endId() );
    assertNull( visitor.stringType );
  }
}

@Test
public void shouldNotParsePointPropertyValuesWithDuplicateKeys() throws Exception
{
  // GIVEN
  DataFactory data = data( ":ID,name,point:Point\n" + "1,Johan,\" { height :0.01 ,longitude:5, latitude : -4.2, latitude : 4.2 } \"\n" );
  Iterable<DataFactory> dataIterable = dataIterable( data );
  Input input =
      new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ),
          silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator nodes = input.nodes().iterator() )
  {
    // THEN
    readNext( nodes );
    fail( "Should have failed when key assigned multiple times, but didn't." );
  }
  catch ( InputException ignore )
  {
    // this is fine
  }
}

@Test
public void shouldProvideDefaultRelationshipType() throws Exception
{
  // GIVEN
  String defaultType = "DEFAULT";
  String customType = "CUSTOM";
  DataFactory data = data( ":START_ID,:END_ID,:TYPE\n" +
      "0,1,\n" +
      "1,2," + customType + "\n" +
      "2,1," + defaultType,
      defaultRelationshipType( defaultType ) );
  Iterable<DataFactory> dataIterable = dataIterable( data );
  Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(),
      dataIterable, defaultFormatRelationshipFileHeader(), IdType.ACTUAL, config( COMMAS ),
      silentBadCollector( 0 ) );
  // WHEN/THEN
  try ( InputIterator relationships = input.relationships().iterator() )
  {
    assertNextRelationship( relationships, 0L, 1L, defaultType, NO_PROPERTIES );
    assertNextRelationship( relationships, 1L, 2L, customType, NO_PROPERTIES );
    assertNextRelationship( relationships, 2L, 1L, defaultType, NO_PROPERTIES );
    assertFalse( readNext( relationships ) );
  }
}

@Test
public void shouldUseHeaderInformationToParsePoint() throws Exception
{
  // GIVEN
  DataFactory data = data(
      ":ID,name,point:Point{crs:WGS-84}\n" +
          "0,Johan,\" { x :1 ,y:2 } \"\n" );
  Iterable<DataFactory> dataIterable = dataIterable( data );
  Input input = new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(),
      IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator nodes = input.nodes().iterator() )
  {
    // THEN
    assertNextNode( nodes, 0L, new Object[]{"name", "Johan", "point",
        Values.pointValue( CoordinateReferenceSystem.WGS84, 1, 2)}, labels() );
    assertFalse( readNext( nodes ) );
  }
}

@Test
public void shouldDoWithoutRelationshipTypeHeaderIfDefaultSupplied() throws Exception
{
  // GIVEN relationship data w/o :TYPE header
  String defaultType = "HERE";
  DataFactory data = data(
      ":START_ID,:END_ID,name\n" +
      "0,1,First\n" +
      "2,3,Second\n", defaultRelationshipType( defaultType ) );
  Iterable<DataFactory> dataIterable = dataIterable( data );
  Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(), dataIterable, defaultFormatRelationshipFileHeader(),
      IdType.ACTUAL, config( COMMAS ),
      silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator relationships = input.relationships().iterator() )
  {
    // THEN
    assertNextRelationship( relationships, 0L, 1L, defaultType, properties( "name", "First" ) );
    assertNextRelationship( relationships, 2L, 3L, defaultType, properties( "name", "Second" ) );
    assertFalse( readNext( relationships ) );
  }
}

@Test
public void shouldParsePointPropertyValuesWithCRSInHeader() throws Exception
{
  // GIVEN
  DataFactory data = data(
      ":ID,name,point:Point{crs:WGS-84-3D}\n" +
          "0,Johan,\" { height :0.01 ,longitude:5, latitude : -4.2 } \"\n" );
  Iterable<DataFactory> dataIterable = dataIterable( data );
  Input input = new CsvInput( dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(),
      IdType.ACTUAL, config( COMMAS ), silentBadCollector( 0 ) );
  // WHEN
  try ( InputIterator nodes = input.nodes().iterator() )
  {
    // THEN
    assertNextNode( nodes, 0L, new Object[]{"name", "Johan", "point",
        Values.pointValue( CoordinateReferenceSystem.WGS84_3D, 5, -4.2, 0.01)}, labels() );
    assertFalse( readNext( nodes ) );
  }
}

@Test
public void shouldProvideRelationshipsFromCsvInput() throws Exception
{
  // GIVEN
  IdType idType = IdType.STRING;
  Iterable<DataFactory> data = dataIterable( data(
     "node1,node2,KNOWS,1234567\n" +
     "node2,node10,HACKS,987654" ) );
  Input input = new CsvInput( datas(), defaultFormatNodeFileHeader(),
      data,
      header( entry( "from", Type.START_ID, idType.extractor( extractors ) ),
          entry( "to", Type.END_ID, idType.extractor( extractors ) ),
          entry( "type", Type.TYPE, extractors.string() ),
          entry( "since", Type.PROPERTY, extractors.long_() ) ), idType, config( COMMAS ),
          silentBadCollector( 0 ) );
  // WHEN/THEN
  try ( InputIterator relationships = input.relationships().iterator() )
  {
    assertNextRelationship( relationships, "node1", "node2", "KNOWS", properties( "since", 1234567L ) );
    assertNextRelationship( relationships, "node2", "node10", "HACKS", properties( "since", 987654L ) );
  }
}

Javadoc

Unifies all data input given to a BatchImporter to allow for more coherent implementations.

Most used methods

nodes
Provides all node data for an import.
relationships
Provides all relationship data for an import.
badCollector
calculateEstimates
idMapper

Popular in Java

Making http requests using okhttp
getContentResolver (Context)
onRequestPermissionsResult (Fragment)
startActivity (Activity)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Path (java.nio.file)
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Reference (javax.naming)
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Best plugins for Eclipse

How to useInput in org.neo4j.unsafe.impl.batchimport.input

Best Java code snippets using org.neo4j.unsafe.impl.batchimport.input.Input (Showing top 20 results out of 315)

How to use
Input
in
org.neo4j.unsafe.impl.batchimport.input