Created
September 30, 2018 15:28
-
-
Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.
Revisions
-
Aaditya Paul created this gist
Sep 30, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,316 @@ import pandas as pd import re def code_generator(Train_data,target_variable): load_data = pd.read_csv(Train_data) target_var = load_data.columns.get_loc(target_variable) c = load_data.columns if int(target_variable)!=1: load_data[[c[1], c[target_var]]] = load_data[[c[target_var], c[1]]] load_data.to_csv('train_data',header=False,index=False) load_data_s = pd.read_csv('train_data') def header_part(): headerss= """ package com.salesforce.hw import com.salesforce.op._ import com.salesforce.op.evaluators.Evaluators import com.salesforce.op.features.FeatureBuilder import com.salesforce.op.features.types._ import com.salesforce.op.readers.DataReaders import com.salesforce.op.stages.impl.classification.BinaryClassificationModelSelector import com.salesforce.op.stages.impl.classification.BinaryClassificationModelsToTry._ import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession""" return headerss data_loading = load_data_s response_type = 'Data_s' datatypes_ = dict(data_loading.dtypes) if response_type =='Data_s': response_var = """ val respon_var = FeatureBuilder.RealNN[Automl_data].extract( _.respon_var.toRealNN).asResponse\n""" elif response_type=='c': response_var = """ val respon_var = FeatureBuilder.PickList[Automl_data].extract( _.respon_var.map(_.toString).toPickList).asResponse\n""" Predictor_var = """ val demo_name = FeatureBuilder.Integral[Automl_data].extract( _.demo_name.toIntegral).asPredictor""" Response_var = """ val demo_name = FeatureBuilder.PickList[Automl_data].extract( _.demo_name.map(_.toString).toPickList).asPredictor""" Float_var = """ val demo_name = FeatureBuilder.Real[Automl_data].extract( _.demo_name.toReal).asPredictor""" Variable_name_ = 'Automl_feature_' Type_definition ={} responce_column = 1 columns_list=list(datatypes_.items()) Type_definition['id']='{}'.format('Int') if response_type =='c': Type_definition['respon_var']='Option[{}]'.format('String') elif response_type == 'Data_s': Type_definition['respon_var']='{}'.format('Int') count_ = 3 for id_string in columns_list[2:]: if id_string[1]=='int64': if len(list(str(count_)))==2: Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Int') else: Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Int') elif id_string[1]=='object': if len(list(str(count_)))==2: Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('String') else: Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('String') elif id_string[1]=='float': if len(list(str(count_)))==2: Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Double') else: Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Double') count_+=1 name = 'demo' Scala_object="""object Newrr { def main(args: Array[String]): Unit = { if (args.isEmpty) { println("You need to pass in the CSV file path as an argument") sys.exit(1) } val csvFilePath = args(0) val csvFilePaths = args(1) println(s"Using user-supplied CSV file path: $csvFilePath") // Set up a SparkSession as normal val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$")) implicit val spark = SparkSession.builder.config(conf).getOrCreate()""" Spark_session = """ val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$")) implicit val spark = SparkSession.builder.config(conf).getOrCreate()""" body = "" max_lims = len(Type_definition) count_n =1 for key, value in Type_definition.items(): if count_n>=max_lims: body += " {0}: {1}".format(key, value) else: body += " {0}: {1},\n".format(key, value,) count_n+=1 Features_types_conversion= [] count_n_=1 add_=[] all_columns=[] for Data_s,data_type_s in datatypes_.items(): if count_n_>=3: if data_type_s=='object': if len(list(str(count_n_)))==2: all_columns.append('Automl_feature_'+str(count_n_)) datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Response_var) else: all_columns.append('Automl_feature_'+'0'+str(count_n_)) datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Response_var) Features_types_conversion.append(datatypes__er_rt) elif data_type_s=='int': if len(list(str(count_n_)))==2: all_columns.append('Automl_feature_'+str(count_n_)) datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Predictor_var) else: all_columns.append('Automl_feature_'+'0'+str(count_n_)) datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Predictor_var) Features_types_conversion.append(datatypes__ew) elif data_type_s=='float': if len(list(str(count_n_)))==2: all_columns.append('Automl_feature_'+str(count_n_)) datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Float_var) else: all_columns.append('Automl_feature_'+'0'+str(count_n_)) datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Float_var) Features_types_conversion.append(datatypes__ew) count_n_+=1 body_features=response_var for body_s in Features_types_conversion: body_features+= "{}\n".format(body_s) data_types_transformation = """ val Automl_dataFeatures = Seq( Variable_name_ ).transmogrify()""" all_colp="" i_count=0 data_ery="" max_ra = int(len(all_columns)/3) for Data_s in range(len(all_columns)): if Data_s<max_ra: if all_columns[Data_s * 3:(Data_s + 1) * 3]: data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])+','+'\n'+' ') else: if all_columns[Data_s * 3:(Data_s + 1) * 3]: data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])) res_code =""" val sanityCheck = true val finalFeatures = if (sanityCheck) respon_var.sanityCheck(Automl_dataFeatures) else Automl_dataFeatures val prediction = BinaryClassificationModelSelector.withTrainValidationSplit( modelTypesToUse = Seq(OpLogisticRegression) ).setInput(respon_var, Automl_dataFeatures).getOutput() val evaluator = Evaluators.BinaryClassification().setLabelCol(respon_var).setPredictionCol(prediction) import spark.implicits._ val trainDataReader = DataReaders.Simple.csvCase[Automl_data]( path = Option(csvFilePath), key = _.id.toString ) val workflow = new OpWorkflow() .setResultFeatures(respon_var, prediction) .setReader(trainDataReader) val fittedWorkflow = workflow.train() val (dataframe, metrics) = fittedWorkflow.scoreAndEvaluate(evaluator = evaluator) println("Transformed dataframe columns:") dataframe.columns.foreach(println) println("Metrics:") fittedWorkflow .save("/tmp/my-model1") println("model_saved") // Load the model val loadedModel = workflow.loadModel("/tmp/my-model1") println("model_loaded") // Score the loaded model val Tpo_datassssDatas = DataReaders.Simple.csvCase[Automl_data]( Option(csvFilePaths), key = _.id.toString) val scores = loadedModel.setReader(Tpo_datassssDatas).score() print("model_scored") scores.write.json("/tmp/my-model13") scores.show(true) println(loadedModel.summaryPretty()) } }""" trans_d = re.sub(re.escape('Variable_name_'),data_ery,data_types_transformation) output = """{0}\n\ncase class Automl_data ( {1} )\n\n{2}\n\n{3}\n{4}\n\n\n{5}\n""".format(header_part(),body,Scala_object,body_features,trans_d,res_code) try: os.remove('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala') except FileNotFoundError: pass with open('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala','a+') as f: f.write(output) return "Scala_code_generated.." # coding: utf-8 #importing libraries from flask import Flask, abort, request import json import shlex import subprocess import re import os import time from flask_cors import CORS app = Flask(__name__) @app.route('/Command_execution', methods=['POST']) def Command_execution(): cmd = './gradlew -q sparkSubmit -Dmain=com.salesforce.hw.Newrr -Dargs="demo.csv demo2.csv"' os.chdir('/Users/monk/TransmogrifAI/helloworld') print('switch_to_directory_') train_data = request.json['train_data'] target_variable = request.json['target'] test_data = request.json['test_data'] print(train_data , target_variable , test_data ) try: subprocess.Popen("rm -r /tmp/my-model13", shell=True) except Exception: pass code_generator(train_data,target_variable) print("code_generated") replaced_data = cmd.replace('demo.csv',train_data).replace('demo2.csv',train_data) print("command_s",replaced_data) execute_command = subprocess.Popen(shlex.split(replaced_data), stdout=subprocess.PIPE).communicate() #setting environment variable os.environ["SPARK_HOME"] = "/Users/monk/spark-2.2.1-bin-hadoop2.7" result =[] pattern = r"Selected Model - .+?None" for patten_ in re.findall(pattern,str(execute_command)): for result_ in patten_.split('\\n'): result.append(result_) return json.dumps({'result_':result}) if __name__ == '__main__': app.run(port=8890, debug=True)