How to use
collect
method
in
org.apache.spark.sql.DataFrame

Best Java code snippets using org.apache.spark.sql.DataFrame.collect (Showing top 8 results out of 315)

Row[] result = topTweets.collect();
for (Row row : result) {
 System.out.println(row.get(0));
Row[] lengths = tweetLength.collect();
for (Row row : result) {
 System.out.println(row.get(0));

public static List<Record> dataFrameToRecordsList(int tenantId, String tableName,
                         DataFrame dataFrame) {
  Row[] rows = dataFrame.collect();
  List<Record> records = new ArrayList<>();
  StructType schema = dataFrame.schema();
  for (Row row : rows) {
    records.add(new Record(tenantId, tableName, convertRowAndSchemaToValuesMap(row, schema)));
  }
  return records;
}

private static ArrayList<String> getColumnStates(DataFrame data, String name) {
  ArrayList<String> states = new ArrayList();
  final Row[] statesRow = data.select(name).distinct().collect();
  for (Row r : statesRow)
    states.add( r.getString(0) );
  return states;
}

@Override
public List<String> call(JobContext jc) throws Exception {
 InputStream source = getClass().getResourceAsStream("/testweet.json");
 // Save the resource as a file in HDFS (or the local tmp dir when using a local filesystem).
 URI input;
 File local = File.createTempFile("tweets", ".json", jc.getLocalTmpDir());
 Files.copy(source, local.toPath(), StandardCopyOption.REPLACE_EXISTING);
 FileSystem fs = FileSystem.get(jc.sc().sc().hadoopConfiguration());
 if ("file".equals(fs.getUri().getScheme())) {
  input = local.toURI();
 } else {
  String uuid = UUID.randomUUID().toString();
  Path target = new Path("/tmp/" + uuid + "-tweets.json");
  fs.copyFromLocalFile(new Path(local.toURI()), target);
  input = target.toUri();
 }
 SQLContext sqlctx = useHiveContext ? jc.hivectx() : jc.sqlctx();
 sqlctx.jsonFile(input.toString()).registerTempTable("tweets");
 List<String> tweetList = new ArrayList<>();
 Row[] result =
  (Row[])(sqlctx.sql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10")
   .collect());
 for (Row r : result) {
   tweetList.add(r.toString());
 }
 return tweetList;
}

@Override
public SqlTypedResult sqlTyped(String command, Integer maxRows, DataSourceDescriptor dataSource) throws  DDFException {
 DataFrame rdd = ((SparkDDFManager) this.getManager()).getHiveContext().sql(command);
 Schema schema = SparkUtils.schemaFromDataFrame(rdd);
 int columnSize = schema.getNumColumns();
 Row[] rddRows = rdd.collect();
 List<List<SqlTypedCell>> sqlTypedResult = new ArrayList<List<SqlTypedCell>>();
 // Scan every cell and add the type information.
 for (int rowIdx = 0; rowIdx < rddRows.length; ++rowIdx) {
  List<SqlTypedCell> row = new ArrayList<SqlTypedCell>();
  for (int colIdx = 0; colIdx < columnSize; ++ colIdx) {
   // TODO: Optimize by reducing getType().
   row.add(new SqlTypedCell(schema.getColumn(colIdx).getType(), rddRows[rowIdx].get(colIdx).toString()));
  }
  sqlTypedResult.add(row);
 }
 return new SqlTypedResult(schema, sqlTypedResult);
}

private AnalyticsQueryResult toResult(DataFrame dataFrame)
    throws AnalyticsExecutionException {
  int resultsLimit = this.sparkConf.getInt("carbon.spark.results.limit", -1);
  if (resultsLimit != -1) {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.limit(resultsLimit).collect()));
  } else {
    return new AnalyticsQueryResult(dataFrame.schema().fieldNames(),
        convertRowsToObjects(dataFrame.collect()));
  }
}

 String sqlCmd = String.format("select distinct(%s) from %s where %s is not null", column.getName(), this.getDDF().getTableName(), column.getName());
 DataFrame sqlresult = sqlContext.sql(sqlCmd);
 Row[] rows = sqlresult.collect();
 List<String> values = new ArrayList<>();
 for(Row row: rows) {
sql = String.format("select %s from %s", sql, this.getDDF().getTableName());
DataFrame sqlResult = sqlContext.sql(sql);
Row[] rows = sqlResult.collect();
Row result = rows[0];
int i = 0;

DataFrame df0 = (new SQLContext(jsc)).createDataFrame(jrdd, WhitespaceContext.class);
DataFrame df1 = model.transform(df0);
prediction = jsc.broadcast(df1.select("prediction").collect());
if (df1.count() > 0) {
  output = s.map(new WhitespaceClassificationFunction());

Popular methods of DataFrame

Popular in Java

Reading from database using SQL prepared statement
compareTo (BigDecimal)
startActivity (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
String (java.lang)
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
ImageIO (javax.imageio)
Top Vim plugins

How to use collectmethodin org.apache.spark.sql.DataFrame

Best Java code snippets using org.apache.spark.sql.DataFrame.collect (Showing top 8 results out of 315)

How to use
collect
method
in
org.apache.spark.sql.DataFrame