for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) { if (maxId == null || maxId.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) { maxId = SegmentIdWithShardSpec.fromDataSegment(existing.getObject()); pending.getVersion().compareTo(maxId.getVersion()) > 0 || (pending.getVersion().equals(maxId.getVersion()) && pending.getShardSpec().getPartitionNum() > maxId.getShardSpec().getPartitionNum())) { maxId = pending; return new SegmentIdWithShardSpec( dataSource, interval, new NumberedShardSpec(0, 0) ); } else if (!maxId.getInterval().equals(interval) || maxId.getVersion().compareTo(maxVersion) > 0) { log.warn( "Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", ); return null; } else if (maxId.getShardSpec() instanceof LinearShardSpec) { return new SegmentIdWithShardSpec( dataSource, maxId.getInterval(), maxId.getVersion(), new LinearShardSpec(maxId.getShardSpec().getPartitionNum() + 1) );
private void addSegment(final SegmentIdWithShardSpec identifier) { segments.put(identifier.getInterval().getStartMillis(), identifier); try { segmentAnnouncer.announceSegment( new DataSegment( identifier.getDataSource(), identifier.getInterval(), identifier.getVersion(), ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), identifier.getShardSpec(), null, 0 ) ); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", identifier.getDataSource()) .addData("interval", identifier.getInterval()) .emit(); } }
); final int committedHydrants = committed.getCommittedHydrants(identifier.toString()); new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink) );
if (!identifiersByDataSource.containsKey(identifier.getDataSource())) { identifiersByDataSource.put(identifier.getDataSource(), new HashSet<>()); identifiersByDataSource.get(identifier.getDataSource()).add(identifier); Iterables.transform(entry.getValue(), input -> input.getInterval()) ); if (identifiers.contains(SegmentIdWithShardSpec.fromDataSegment(segment))) { retVal.add(segment);
void add(SegmentIdWithShardSpec identifier) { intervalToSegmentStates.computeIfAbsent( identifier.getInterval().getStartMillis(), k -> new SegmentsOfInterval(identifier.getInterval()) ).setAppendingSegment(SegmentWithState.newSegment(identifier)); lastSegmentId = identifier.toString(); }
public static SegmentIdWithShardSpec fromDataSegment(final DataSegment segment) { return new SegmentIdWithShardSpec( segment.getDataSource(), segment.getInterval(), segment.getVersion(), segment.getShardSpec() ); } }
Granularities.HOUR, "s1", id1.toString() ); final SegmentIdWithShardSpec id3 = allocate( Granularities.HOUR, "s1", id2.toString() ); final SegmentIdWithShardSpec id4 = allocate( Granularities.HOUR, "s1", id1.toString() ); final SegmentIdWithShardSpec id5 = allocate( Granularities.HOUR, "s1", id1.toString() ); final SegmentIdWithShardSpec id6 = allocate( Granularities.MINUTE, "s1", id1.toString() ); final SegmentIdWithShardSpec id7 = allocate(
private Sink getOrCreateSink(final SegmentIdWithShardSpec identifier) { Sink retVal = sinks.get(identifier); if (retVal == null) { retVal = new Sink( identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, tuningConfig.isReportParseExceptions(), null ); try { segmentAnnouncer.announceSegment(retVal.getSegment()); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) .addData("interval", retVal.getInterval()) .emit(); } sinks.put(identifier, retVal); metrics.setSinkCount(sinks.size()); sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), identifier.getShardSpec().createChunk(retVal)); } return retVal; }
public static Committed create( Map<SegmentIdWithShardSpec, Integer> hydrants0, Object metadata ) { final ImmutableMap.Builder<String, Integer> hydrants = ImmutableMap.builder(); for (Map.Entry<SegmentIdWithShardSpec, Integer> entry : hydrants0.entrySet()) { hydrants.put(entry.getKey().toString(), entry.getValue()); } return new Committed(hydrants.build(), metadata); }
final Committed oldCommit = readCommit(); if (oldCommit != null) { writeCommit(oldCommit.without(identifier.toString())); .addData("identifier", identifier.toString()) .emit(); throw Throwables.propagate(e); .addData("identifier", identifier.toString()) .emit(); sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink) ); for (FireHydrant hydrant : sink) {
SegmentsForSequence build() { final NavigableMap<Long, SegmentsOfInterval> map = new TreeMap<>(); for (Entry<SegmentIdWithShardSpec, Pair<SegmentWithState, List<SegmentWithState>>> entry : intervalToSegments.entrySet()) { map.put( entry.getKey().getInterval().getStartMillis(), new SegmentsOfInterval(entry.getKey().getInterval(), entry.getValue().lhs, entry.getValue().rhs) ); } return new SegmentsForSequence(map, lastSegmentId); } }
@Override public Set<DataSegment> findUsedSegments(Set<SegmentIdWithShardSpec> identifiers) { final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural()); for (DataSegment dataSegment : appenderatorTester.getPushedSegments()) { timeline.add( dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().createChunk(dataSegment) ); } final Set<DataSegment> retVal = new HashSet<>(); for (SegmentIdWithShardSpec identifier : identifiers) { for (TimelineObjectHolder<String, DataSegment> holder : timeline.lookup(identifier.getInterval())) { for (PartitionChunk<DataSegment> chunk : holder.getObject()) { if (identifiers.contains(SegmentIdWithShardSpec.fromDataSegment(chunk.getObject()))) { retVal.add(chunk.getObject()); } } } } return retVal; } }
Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version", identifier.toString()); identifier.toString(), interval, identifier.getVersion(), false ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_1", identifier1.toString()); identifier1.toString(), interval, identifier1.getVersion(), false ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier2.toString()); identifier1.toString(), interval, identifier1.getVersion(), false ); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier3.toString()); Assert.assertEquals(identifier2, identifier3); Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_3", identifier4.toString());
/** * Move a set of identifiers out from "active", making way for newer segments. * This method is to support KafkaIndexTask's legacy mode and will be removed in the future. * See KakfaIndexTask.runLegacy(). */ public void moveSegmentOut(final String sequenceName, final List<SegmentIdWithShardSpec> identifiers) { synchronized (segments) { final SegmentsForSequence activeSegmentsForSequence = segments.get(sequenceName); if (activeSegmentsForSequence == null) { throw new ISE("WTF?! Asked to remove segments for sequenceName[%s] which doesn't exist...", sequenceName); } for (final SegmentIdWithShardSpec identifier : identifiers) { log.info("Moving segment[%s] out of active list.", identifier); final long key = identifier.getInterval().getStartMillis(); final SegmentsOfInterval segmentsOfInterval = activeSegmentsForSequence.get(key); if (segmentsOfInterval == null || segmentsOfInterval.getAppendingSegment() == null || !segmentsOfInterval.getAppendingSegment().getSegmentIdentifier().equals(identifier)) { throw new ISE("WTF?! Asked to remove segment[%s] that didn't exist...", identifier); } segmentsOfInterval.finishAppendingToCurrentActiveSegment(SegmentWithState::finishAppending); } } }
@Override public SegmentIdWithShardSpec apply(DataSegment input) { return SegmentIdWithShardSpec.fromDataSegment(input); } }
private void assertSameIdentifier(final SegmentIdWithShardSpec one, final SegmentIdWithShardSpec other) { Assert.assertEquals(one, other); Assert.assertEquals(one.getShardSpec().getPartitionNum(), other.getShardSpec().getPartitionNum()); if (one.getShardSpec().getClass() == NumberedShardSpec.class && other.getShardSpec().getClass() == NumberedShardSpec.class) { Assert.assertEquals( ((NumberedShardSpec) one.getShardSpec()).getPartitions(), ((NumberedShardSpec) other.getShardSpec()).getPartitions() ); } else if (one.getShardSpec().getClass() == LinearShardSpec.class && other.getShardSpec().getClass() == LinearShardSpec.class) { // do nothing } else { throw new ISE( "Unexpected shardSpecs [%s] and [%s]", one.getShardSpec().getClass(), other.getShardSpec().getClass() ); } } }
if (!identifier.getDataSource().equals(schema.getDataSource())) { throw new IAE( "Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource() );
return new SegmentIdWithShardSpec( dataSource, interval,
Granularities.HOUR, "s1", id1.toString() ); final SegmentIdWithShardSpec id3 = allocate( Granularities.HOUR, "s1", id2.toString() ); new SegmentIdWithShardSpec( DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), assertSameIdentifier( id2, new SegmentIdWithShardSpec( DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), assertSameIdentifier( id3, new SegmentIdWithShardSpec( DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME),
handoffNotifier.registerSegmentHandoffCallback( new SegmentDescriptor( segmentIdentifier.getInterval(), segmentIdentifier.getVersion(), segmentIdentifier.getShardSpec().getPartitionNum() ), Execs.directExecutor(),