使用Java开发Spark程序,JavaRDD的功能算子中英文注释
JavaRDDLike的实现应该扩展这个虚拟抽象类,而不是直接继承这个特性。
JavaRDD
1  | package org.apache.spark.api.java  | 
partitions
1  | /** Set of partitions in this RDD.  | 
getNumPartitions
1  | /** Return the number of partitions in this RDD.  | 
partitioner
1  | /** The partitioner of this RDD.  | 
context
1  | /** The [[org.apache.spark.SparkContext]] that this RDD was created on.  | 
id
1  | /** A unique ID for this RDD (within its SparkContext).  | 
name
1  | def name(): String = rdd.name  | 
getStorageLevel
1  | /** Get the RDD's current storage level, or StorageLevel.NONE if none is set.  | 
iterator
1  | /**  | 
Transformations (return a new RDD)
map
1  | /**  | 
mapPartitionsWithIndex
1  | /**  | 
mapToDouble
1  | /**  | 
mapToPair
1  | /**  | 
flatMap
1  | /**  | 
flatMapToDouble
1  | /**  | 
flatMapToPair
1  | /**  | 
mapPartitions
1  | /**  | 
mapPartitionsToDouble
1  | /**  | 
mapPartitionsToPair
1  | /**  | 
foreachPartition
1  | /**  | 
glom
1  | /**  | 
cartesian
1  | /**  | 
groupBy
1  | /**  | 
pipe
1  | /**  | 
zip
1  | /**  | 
zipPartitions
1  | /**  | 
zipWithUniqueId
1  | /**  | 
zipWithIndex
1  | /**  | 
Actions (launch a job to return a value to the user program)
foreach
1  | /**  | 
collect
1  | /**  | 
toLocalIterator
1  | /**  | 
collectPartitions
1  | /**  | 
reduce
1  | /**  | 
treeReduce
1  | /**  | 
fold
1  | /**  | 
aggregate
1  | /**  | 
treeAggregate
1  | /**  | 
count
1  | /**  | 
countApprox
1  | /**  | 
countByValue
1  | /**  | 
countByValueApprox
1  | /**  | 
take
1  | /**  | 
takeSample
1  | def takeSample(withReplacement: Boolean, num: Int): JList[T] =  | 
first
1  | /**  | 
isEmpty
1  | /**  | 
saveAsTextFile
1  | /**  | 
saveAsObjectFile
1  | /**  | 
keyBy
1  | /**  | 
checkpoint
1  | /**  | 
isCheckpointed
1  | /**  | 
getCheckpointFile
1  | /**  | 
toDebugString
1  | /** A description of this RDD and its recursive dependencies for debugging.  | 
top
1  | /**  | 
takeOrdered
1  | /**  | 
max
1  | /**  | 
min
1  | /**  | 
countApproxDistinct
1  | /**  | 
countAsync
1  | /**  | 
collectAsync
1  | /**  | 
takeAsync
1  | /**  | 
foreachAsync
1  | /**  | 
foreachPartitionAsync
1  | /**  |