weldpua2008 · August 4, 2020 10:13 · Aug 4, 2020
diff --git a/example_spark.py b/example_spark.py
@@ -0,0 +1,25 @@
+from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType
+from pyspark.sql import SparkSession
+
+# Create Spark session
+spark = SparkSession.builder \
+    .appName('appName') \
+    .getOrCreate()
+# List
+data = [('Category A', 100, "This is category A"),
+        ('Category B', 120, "This is category B"),
+        ('Category C', 150, "This is category C")]
+
+# Create a schema for the dataframe
+schema = StructType([
+    StructField('Category', StringType(), True),
+    StructField('Count', IntegerType(), True),
+    StructField('Description', StringType(), True)
+])
+# Convert list to RDD
+rdd = spark.sparkContext.parallelize(data)
+
+# Create data frame
+df = spark.createDataFrame(rdd,schema)
+print(df.schema)
+df.show()
No results found