Skip to content

Instantly share code, notes, and snippets.

View ronin76x's full-sized avatar
:octocat:
Focussing

Harsha (ಹರ್ಷ) ronin76x

:octocat:
Focussing
View GitHub Profile
@ronin76x
ronin76x / Main.scala
Created June 3, 2020 00:14
no-coalesce
trainData.write.json("src/main/resources/_processed/JSON/data")
import pandas as pd
import re
stopwords = ["a","i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours",
"yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself",
"it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which",
"who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be",
"been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an",
"the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for",
"with", "about", "against", "between", "into", "through", "during", "before", "after", "above",
@ronin76x
ronin76x / Main.scala
Last active June 2, 2020 22:29
load-clean
val data = spark
.read
.option("inferSchema", "true")
.option("header", "true")
.csv(args.head)
val stopwords = List("i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself",
"yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them",
"their", "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is",
"are", "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a",
"an", "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with",
@ronin76x
ronin76x / Main.scala
Created June 2, 2020 20:38
Load file into spark rdd
val data = spark
.read
.option("inferSchema", "true")
.option("header", "true")
.csv(args.head)
@ronin76x
ronin76x / Main.scala
Last active June 2, 2020 23:28
Spark initialization
val conf = new SparkConf()
.setAppName("BGReview ETL")
.setMaster("local[8]")
val sc = new SparkContext(conf)
val spark = SparkSession
.builder()
.appName("BGReview ETL")
.getOrCreate()