使用Java开发Spark程序,JavaRDD的功能算子中英文注释
JavaRDDLike的实现应该扩展这个虚拟抽象类,而不是直接继承这个特性。
JavaRDD
1 | package org.apache.spark.api.java |
partitions
1 | /** Set of partitions in this RDD. |
getNumPartitions
1 | /** Return the number of partitions in this RDD. |
partitioner
1 | /** The partitioner of this RDD. |
context
1 | /** The [[org.apache.spark.SparkContext]] that this RDD was created on. |
id
1 | /** A unique ID for this RDD (within its SparkContext). |
name
1 | def name(): String = rdd.name |
getStorageLevel
1 | /** Get the RDD's current storage level, or StorageLevel.NONE if none is set. |
iterator
1 | /** |
Transformations (return a new RDD)
map
1 | /** |
mapPartitionsWithIndex
1 | /** |
mapToDouble
1 | /** |
mapToPair
1 | /** |
flatMap
1 | /** |
flatMapToDouble
1 | /** |
flatMapToPair
1 | /** |
mapPartitions
1 | /** |
mapPartitionsToDouble
1 | /** |
mapPartitionsToPair
1 | /** |
foreachPartition
1 | /** |
glom
1 | /** |
cartesian
1 | /** |
groupBy
1 | /** |
pipe
1 | /** |
zip
1 | /** |
zipPartitions
1 | /** |
zipWithUniqueId
1 | /** |
zipWithIndex
1 | /** |
Actions (launch a job to return a value to the user program)
foreach
1 | /** |
collect
1 | /** |
toLocalIterator
1 | /** |
collectPartitions
1 | /** |
reduce
1 | /** |
treeReduce
1 | /** |
fold
1 | /** |
aggregate
1 | /** |
treeAggregate
1 | /** |
count
1 | /** |
countApprox
1 | /** |
countByValue
1 | /** |
countByValueApprox
1 | /** |
take
1 | /** |
takeSample
1 | def takeSample(withReplacement: Boolean, num: Int): JList[T] = |
first
1 | /** |
isEmpty
1 | /** |
saveAsTextFile
1 | /** |
saveAsObjectFile
1 | /** |
keyBy
1 | /** |
checkpoint
1 | /** |
isCheckpointed
1 | /** |
getCheckpointFile
1 | /** |
toDebugString
1 | /** A description of this RDD and its recursive dependencies for debugging. |
top
1 | /** |
takeOrdered
1 | /** |
max
1 | /** |
min
1 | /** |
countApproxDistinct
1 | /** |
countAsync
1 | /** |
collectAsync
1 | /** |
takeAsync
1 | /** |
foreachAsync
1 | /** |
foreachPartitionAsync
1 | /** |