Skip to content

Instantly share code, notes, and snippets.

@monk1337
Created September 30, 2018 15:28
Show Gist options
  • Select an option

  • Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.

Select an option

Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.
import pandas as pd
import re
def code_generator(Train_data,target_variable):
load_data = pd.read_csv(Train_data)
target_var = load_data.columns.get_loc(target_variable)
c = load_data.columns
if int(target_variable)!=1:
load_data[[c[1], c[target_var]]] = load_data[[c[target_var], c[1]]]
load_data.to_csv('train_data',header=False,index=False)
load_data_s = pd.read_csv('train_data')
def header_part():
headerss= """
package com.salesforce.hw
import com.salesforce.op._
import com.salesforce.op.evaluators.Evaluators
import com.salesforce.op.features.FeatureBuilder
import com.salesforce.op.features.types._
import com.salesforce.op.readers.DataReaders
import com.salesforce.op.stages.impl.classification.BinaryClassificationModelSelector
import com.salesforce.op.stages.impl.classification.BinaryClassificationModelsToTry._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession"""
return headerss
data_loading = load_data_s
response_type = 'Data_s'
datatypes_ = dict(data_loading.dtypes)
if response_type =='Data_s':
response_var = """ val respon_var = FeatureBuilder.RealNN[Automl_data].extract(
_.respon_var.toRealNN).asResponse\n"""
elif response_type=='c':
response_var = """ val respon_var = FeatureBuilder.PickList[Automl_data].extract(
_.respon_var.map(_.toString).toPickList).asResponse\n"""
Predictor_var = """ val demo_name = FeatureBuilder.Integral[Automl_data].extract(
_.demo_name.toIntegral).asPredictor"""
Response_var = """ val demo_name = FeatureBuilder.PickList[Automl_data].extract(
_.demo_name.map(_.toString).toPickList).asPredictor"""
Float_var = """ val demo_name = FeatureBuilder.Real[Automl_data].extract(
_.demo_name.toReal).asPredictor"""
Variable_name_ = 'Automl_feature_'
Type_definition ={}
responce_column = 1
columns_list=list(datatypes_.items())
Type_definition['id']='{}'.format('Int')
if response_type =='c':
Type_definition['respon_var']='Option[{}]'.format('String')
elif response_type == 'Data_s':
Type_definition['respon_var']='{}'.format('Int')
count_ = 3
for id_string in columns_list[2:]:
if id_string[1]=='int64':
if len(list(str(count_)))==2:
Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Int')
else:
Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Int')
elif id_string[1]=='object':
if len(list(str(count_)))==2:
Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('String')
else:
Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('String')
elif id_string[1]=='float':
if len(list(str(count_)))==2:
Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Double')
else:
Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Double')
count_+=1
name = 'demo'
Scala_object="""object Newrr {
def main(args: Array[String]): Unit = {
if (args.isEmpty) {
println("You need to pass in the CSV file path as an argument")
sys.exit(1)
}
val csvFilePath = args(0)
val csvFilePaths = args(1)
println(s"Using user-supplied CSV file path: $csvFilePath")
// Set up a SparkSession as normal
val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$"))
implicit val spark = SparkSession.builder.config(conf).getOrCreate()"""
Spark_session = """ val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$"))
implicit val spark = SparkSession.builder.config(conf).getOrCreate()"""
body = ""
max_lims = len(Type_definition)
count_n =1
for key, value in Type_definition.items():
if count_n>=max_lims:
body += " {0}: {1}".format(key, value)
else:
body += " {0}: {1},\n".format(key, value,)
count_n+=1
Features_types_conversion= []
count_n_=1
add_=[]
all_columns=[]
for Data_s,data_type_s in datatypes_.items():
if count_n_>=3:
if data_type_s=='object':
if len(list(str(count_n_)))==2:
all_columns.append('Automl_feature_'+str(count_n_))
datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Response_var)
else:
all_columns.append('Automl_feature_'+'0'+str(count_n_))
datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Response_var)
Features_types_conversion.append(datatypes__er_rt)
elif data_type_s=='int':
if len(list(str(count_n_)))==2:
all_columns.append('Automl_feature_'+str(count_n_))
datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Predictor_var)
else:
all_columns.append('Automl_feature_'+'0'+str(count_n_))
datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Predictor_var)
Features_types_conversion.append(datatypes__ew)
elif data_type_s=='float':
if len(list(str(count_n_)))==2:
all_columns.append('Automl_feature_'+str(count_n_))
datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Float_var)
else:
all_columns.append('Automl_feature_'+'0'+str(count_n_))
datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Float_var)
Features_types_conversion.append(datatypes__ew)
count_n_+=1
body_features=response_var
for body_s in Features_types_conversion:
body_features+= "{}\n".format(body_s)
data_types_transformation = """ val Automl_dataFeatures = Seq(
Variable_name_
).transmogrify()"""
all_colp=""
i_count=0
data_ery=""
max_ra = int(len(all_columns)/3)
for Data_s in range(len(all_columns)):
if Data_s<max_ra:
if all_columns[Data_s * 3:(Data_s + 1) * 3]:
data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])+','+'\n'+' ')
else:
if all_columns[Data_s * 3:(Data_s + 1) * 3]:
data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3]))
res_code =""" val sanityCheck = true
val finalFeatures = if (sanityCheck) respon_var.sanityCheck(Automl_dataFeatures) else Automl_dataFeatures
val prediction = BinaryClassificationModelSelector.withTrainValidationSplit(
modelTypesToUse = Seq(OpLogisticRegression)
).setInput(respon_var, Automl_dataFeatures).getOutput()
val evaluator = Evaluators.BinaryClassification().setLabelCol(respon_var).setPredictionCol(prediction)
import spark.implicits._
val trainDataReader = DataReaders.Simple.csvCase[Automl_data](
path = Option(csvFilePath),
key = _.id.toString
)
val workflow =
new OpWorkflow()
.setResultFeatures(respon_var, prediction)
.setReader(trainDataReader)
val fittedWorkflow = workflow.train()
val (dataframe, metrics) = fittedWorkflow.scoreAndEvaluate(evaluator = evaluator)
println("Transformed dataframe columns:")
dataframe.columns.foreach(println)
println("Metrics:")
fittedWorkflow .save("/tmp/my-model1")
println("model_saved")
// Load the model
val loadedModel = workflow.loadModel("/tmp/my-model1")
println("model_loaded")
// Score the loaded model
val Tpo_datassssDatas = DataReaders.Simple.csvCase[Automl_data](
Option(csvFilePaths),
key = _.id.toString)
val scores = loadedModel.setReader(Tpo_datassssDatas).score()
print("model_scored")
scores.write.json("/tmp/my-model13")
scores.show(true)
println(loadedModel.summaryPretty())
}
}"""
trans_d = re.sub(re.escape('Variable_name_'),data_ery,data_types_transformation)
output = """{0}\n\ncase class Automl_data
(
{1}
)\n\n{2}\n\n{3}\n{4}\n\n\n{5}\n""".format(header_part(),body,Scala_object,body_features,trans_d,res_code)
try:
os.remove('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala')
except FileNotFoundError:
pass
with open('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala','a+') as f:
f.write(output)
return "Scala_code_generated.."
# coding: utf-8
#importing libraries
from flask import Flask, abort, request
import json
import shlex
import subprocess
import re
import os
import time
from flask_cors import CORS
app = Flask(__name__)
@app.route('/Command_execution', methods=['POST'])
def Command_execution():
cmd = './gradlew -q sparkSubmit -Dmain=com.salesforce.hw.Newrr -Dargs="demo.csv demo2.csv"'
os.chdir('/Users/monk/TransmogrifAI/helloworld')
print('switch_to_directory_')
train_data = request.json['train_data']
target_variable = request.json['target']
test_data = request.json['test_data']
print(train_data , target_variable , test_data )
try:
subprocess.Popen("rm -r /tmp/my-model13", shell=True)
except Exception:
pass
code_generator(train_data,target_variable)
print("code_generated")
replaced_data = cmd.replace('demo.csv',train_data).replace('demo2.csv',train_data)
print("command_s",replaced_data)
execute_command = subprocess.Popen(shlex.split(replaced_data), stdout=subprocess.PIPE).communicate()
#setting environment variable
os.environ["SPARK_HOME"] = "/Users/monk/spark-2.2.1-bin-hadoop2.7"
result =[]
pattern = r"Selected Model - .+?None"
for patten_ in re.findall(pattern,str(execute_command)):
for result_ in patten_.split('\\n'):
result.append(result_)
return json.dumps({'result_':result})
if __name__ == '__main__':
app.run(port=8890, debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment