Skip to content

Instantly share code, notes, and snippets.

@jimathyp
Last active August 24, 2022 20:38
Show Gist options
  • Save jimathyp/b1eab3eb30eedde929c12d1eda9dd51a to your computer and use it in GitHub Desktop.
Save jimathyp/b1eab3eb30eedde929c12d1eda9dd51a to your computer and use it in GitHub Desktop.

Revisions

  1. jimathyp revised this gist Aug 24, 2022. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions spark-df-functions.md
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    Spark functions
    ===============

    dir(df)
    ```
    ['__class__',
  2. jimathyp created this gist Aug 19, 2022.
    141 changes: 141 additions & 0 deletions spark-df-functions.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,141 @@
    dir(df)
    ```
    ['__class__',
    '__delattr__',
    '__dict__',
    '__dir__',
    '__doc__',
    '__eq__',
    '__format__',
    '__ge__',
    '__getattr__',
    '__getattribute__',
    '__getitem__',
    '__gt__',
    '__hash__',
    '__init__',
    '__init_subclass__',
    '__le__',
    '__lt__',
    '__module__',
    '__ne__',
    '__new__',
    '__reduce__',
    '__reduce_ex__',
    '__repr__',
    '__setattr__',
    '__sizeof__',
    '__str__',
    '__subclasshook__',
    '__weakref__',
    '_collect_as_arrow',
    '_jcols',
    '_jdf',
    '_jmap',
    '_joinAsOf',
    '_jseq',
    '_lazy_rdd',
    '_repr_html_',
    '_sc',
    '_schema',
    '_sort_cols',
    '_support_repr_html',
    '_to_corrected_pandas_type',
    'agg',
    'alias',
    'approxQuantile',
    'cache',
    'checkpoint',
    'coalesce',
    'colRegex',
    'collect',
    'columns',
    'corr',
    'count',
    'cov',
    'createGlobalTempView',
    'createOrReplaceGlobalTempView',
    'createOrReplaceTempView',
    'createTempView',
    'crossJoin',
    'crosstab',
    'cube',
    'describe',
    'display',
    'distinct',
    'drop',
    'dropDuplicates',
    'drop_duplicates',
    'dropna',
    'dtypes',
    'exceptAll',
    'explain',
    'fillna',
    'filter',
    'first',
    'foreach',
    'foreachPartition',
    'freqItems',
    'groupBy',
    'groupby',
    'head',
    'hint',
    'inputFiles',
    'intersect',
    'intersectAll',
    'isLocal',
    'isStreaming',
    'is_cached',
    'join',
    'limit',
    'localCheckpoint',
    'mapInPandas',
    'na',
    'orderBy',
    'persist',
    'printSchema',
    'randomSplit',
    'rdd',
    'registerTempTable',
    'repartition',
    'repartitionByRange',
    'replace',
    'rollup',
    'sameSemantics',
    'sample',
    'sampleBy',
    'schema',
    'select',
    'selectExpr',
    'semanticHash',
    'show',
    'sort',
    'sortWithinPartitions',
    'sql_ctx',
    'stat',
    'storageLevel',
    'subtract',
    'summary',
    'tail',
    'take',
    'toDF',
    'toJSON',
    'toLocalIterator',
    'toPandas',
    'to_koalas',
    'to_pandas_on_spark',
    'transform',
    'union',
    'unionAll',
    'unionByName',
    'unpersist',
    'where',
    'withColumn',
    'withColumnRenamed',
    'withColumns',
    'withMetadata',
    'withWatermark',
    'write',
    'writeStream',
    'writeTo']
    ```