snehalnair · July 5, 2020 09:41
diff --git a/calculate_sparsity b/calculate_sparsity
 def get_mat_sparsity(ratings):
    # Count the total number of ratings in the dataset
    count_nonzero = ratings.select("rating").count()

    # Count the number of distinct userIds and distinct movieIds
    total_elements = ratings.select("userId").distinct().count() * ratings.select("movieId").distinct().count()

    # Divide the numerator by the denominator
    sparsity = (1.0 - (count_nonzero *1.0)/total_elements)*100
    print("The ratings dataframe is ", "%.2f" % sparsity + "% sparse.")
    
 get_mat_sparsity(ratings)
	def get_mat_sparsity(ratings):
	# Count the total number of ratings in the dataset
	count_nonzero = ratings.select("rating").count()

	# Count the number of distinct userIds and distinct movieIds
	total_elements = ratings.select("userId").distinct().count() * ratings.select("movieId").distinct().count()

	# Divide the numerator by the denominator
	sparsity = (1.0 - (count_nonzero 1.0)/total_elements)100
	print("The ratings dataframe is ", "%.2f" % sparsity + "% sparse.")

	get_mat_sparsity(ratings)
No results found