From 7be093de8b18c2af5f190c410843c7d18f63afc1 Mon Sep 17 00:00:00 2001 From: lokeshrathi Date: Mon, 17 Apr 2023 18:49:18 +0530 Subject: [PATCH] Added: Code to use value_counts in pyspark --- pyspark-value_counts.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 pyspark-value_counts.py diff --git a/pyspark-value_counts.py b/pyspark-value_counts.py new file mode 100644 index 0000000..fc257e5 --- /dev/null +++ b/pyspark-value_counts.py @@ -0,0 +1,18 @@ +from pyspark.sql import SparkSession +from pyspark.sql.functions import desc + +# Create a SparkSession +spark = SparkSession.builder.appName("ValueCountsExample").getOrCreate() + +# Create a PySpark DataFrame +data = [("apple", 5), ("orange", 3), ("banana", 2), ("apple", 3), ("orange", 4)] +df = spark.createDataFrame(data, schema=["fruit", "quantity"]) + +# Use groupBy and count to get the counts for each distinct value in the 'fruit' column +counts = df.groupBy("fruit").count().orderBy(desc("count")) + +# Show the results +counts.show() + +# Stop the SparkSession +spark.stop()