public static final @Nullable Tuple4<Long, Long, Long, Long> contentSizeStats( JavaRDD<ApacheAccessLog> accessLogRDD) { JavaDoubleRDD contentSizes = accessLogRDD.mapToDouble(new GetContentSize()).cache(); long count = contentSizes.count(); if (count == 0) { return null; } Object ordering = Ordering.natural(); final Comparator<Double> cmp = (Comparator<Double>)ordering; return new Tuple4<>(count, contentSizes.reduce(new SumReducer()).longValue(), contentSizes.min(cmp).longValue(), contentSizes.max(cmp).longValue()); }
/** * @param evalData data for evaluation * @return the Dunn Index of a given clustering * (https://en.wikipedia.org/wiki/Cluster_analysis#Internal_evaluation); higher is better */ @Override double evaluate(JavaRDD<Vector> evalData) { // Intra-cluster distance is mean distance to centroid double maxIntraClusterDistance = fetchClusterMetrics(evalData).values().mapToDouble(ClusterMetric::getMeanDist).max(); // Inter-cluster distance is distance between centroids double minInterClusterDistance = Double.POSITIVE_INFINITY; List<ClusterInfo> clusters = new ArrayList<>(getClustersByID().values()); DistanceFn<double[]> distanceFn = getDistanceFn(); for (int i = 0; i < clusters.size(); i++) { double[] centerI = clusters.get(i).getCenter(); // Distances are symmetric, hence d(i,j) == d(j,i) for (int j = i + 1; j < clusters.size(); j++) { double[] centerJ = clusters.get(j).getCenter(); minInterClusterDistance = Math.min(minInterClusterDistance, distanceFn.applyAsDouble(centerI, centerJ)); } } return minInterClusterDistance / maxIntraClusterDistance; }
@Test public void naturalMax() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(); assertEquals(4.0, max, 0.0); }
@Test public void naturalMax() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(); assertEquals(4.0, max, 0.0); }
@Test public void naturalMax() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(); assertEquals(4.0, max, 0.0); }
@Test public void max() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(new DoubleComparator()); assertEquals(4.0, max, 0.001); }
@Test public void max() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(new DoubleComparator()); assertEquals(4.0, max, 0.001); }
@Test public void max() { JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0)); double max = rdd.max(new DoubleComparator()); assertEquals(4.0, max, 0.001); }
@Override public OptionalDouble max() { if (doubleStream.isEmpty()) { return OptionalDouble.empty(); } return OptionalDouble.of(doubleStream.max()); }
/** * The main function to take the input IDs. * @param args the arguments for the function * @throws IOException due to an error reading from the URL */ public static void main(String[] args) throws IOException { long start = System.currentTimeMillis(); StructureDataRDD structureDataRDD = new StructureDataRDD("/path/to/hadoopfolder"); SegmentDataRDD calphaChains = structureDataRDD.getCalpha().filterLength(10, 300); JavaDoubleRDD lengthDist = calphaChains.getLengthDist().cache(); System.out.println(lengthDist.mean()); System.out.println(lengthDist.min()); System.out.println(lengthDist.max()); System.out.println(lengthDist.count()); System.out.println(System.currentTimeMillis()-start); }