Created
September 30, 2018 15:28
-
-
Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import re | |
| def code_generator(Train_data,target_variable): | |
| load_data = pd.read_csv(Train_data) | |
| target_var = load_data.columns.get_loc(target_variable) | |
| c = load_data.columns | |
| if int(target_variable)!=1: | |
| load_data[[c[1], c[target_var]]] = load_data[[c[target_var], c[1]]] | |
| load_data.to_csv('train_data',header=False,index=False) | |
| load_data_s = pd.read_csv('train_data') | |
| def header_part(): | |
| headerss= """ | |
| package com.salesforce.hw | |
| import com.salesforce.op._ | |
| import com.salesforce.op.evaluators.Evaluators | |
| import com.salesforce.op.features.FeatureBuilder | |
| import com.salesforce.op.features.types._ | |
| import com.salesforce.op.readers.DataReaders | |
| import com.salesforce.op.stages.impl.classification.BinaryClassificationModelSelector | |
| import com.salesforce.op.stages.impl.classification.BinaryClassificationModelsToTry._ | |
| import org.apache.spark.SparkConf | |
| import org.apache.spark.sql.SparkSession""" | |
| return headerss | |
| data_loading = load_data_s | |
| response_type = 'Data_s' | |
| datatypes_ = dict(data_loading.dtypes) | |
| if response_type =='Data_s': | |
| response_var = """ val respon_var = FeatureBuilder.RealNN[Automl_data].extract( | |
| _.respon_var.toRealNN).asResponse\n""" | |
| elif response_type=='c': | |
| response_var = """ val respon_var = FeatureBuilder.PickList[Automl_data].extract( | |
| _.respon_var.map(_.toString).toPickList).asResponse\n""" | |
| Predictor_var = """ val demo_name = FeatureBuilder.Integral[Automl_data].extract( | |
| _.demo_name.toIntegral).asPredictor""" | |
| Response_var = """ val demo_name = FeatureBuilder.PickList[Automl_data].extract( | |
| _.demo_name.map(_.toString).toPickList).asPredictor""" | |
| Float_var = """ val demo_name = FeatureBuilder.Real[Automl_data].extract( | |
| _.demo_name.toReal).asPredictor""" | |
| Variable_name_ = 'Automl_feature_' | |
| Type_definition ={} | |
| responce_column = 1 | |
| columns_list=list(datatypes_.items()) | |
| Type_definition['id']='{}'.format('Int') | |
| if response_type =='c': | |
| Type_definition['respon_var']='Option[{}]'.format('String') | |
| elif response_type == 'Data_s': | |
| Type_definition['respon_var']='{}'.format('Int') | |
| count_ = 3 | |
| for id_string in columns_list[2:]: | |
| if id_string[1]=='int64': | |
| if len(list(str(count_)))==2: | |
| Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Int') | |
| else: | |
| Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Int') | |
| elif id_string[1]=='object': | |
| if len(list(str(count_)))==2: | |
| Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('String') | |
| else: | |
| Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('String') | |
| elif id_string[1]=='float': | |
| if len(list(str(count_)))==2: | |
| Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Double') | |
| else: | |
| Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Double') | |
| count_+=1 | |
| name = 'demo' | |
| Scala_object="""object Newrr { | |
| def main(args: Array[String]): Unit = { | |
| if (args.isEmpty) { | |
| println("You need to pass in the CSV file path as an argument") | |
| sys.exit(1) | |
| } | |
| val csvFilePath = args(0) | |
| val csvFilePaths = args(1) | |
| println(s"Using user-supplied CSV file path: $csvFilePath") | |
| // Set up a SparkSession as normal | |
| val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$")) | |
| implicit val spark = SparkSession.builder.config(conf).getOrCreate()""" | |
| Spark_session = """ val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$")) | |
| implicit val spark = SparkSession.builder.config(conf).getOrCreate()""" | |
| body = "" | |
| max_lims = len(Type_definition) | |
| count_n =1 | |
| for key, value in Type_definition.items(): | |
| if count_n>=max_lims: | |
| body += " {0}: {1}".format(key, value) | |
| else: | |
| body += " {0}: {1},\n".format(key, value,) | |
| count_n+=1 | |
| Features_types_conversion= [] | |
| count_n_=1 | |
| add_=[] | |
| all_columns=[] | |
| for Data_s,data_type_s in datatypes_.items(): | |
| if count_n_>=3: | |
| if data_type_s=='object': | |
| if len(list(str(count_n_)))==2: | |
| all_columns.append('Automl_feature_'+str(count_n_)) | |
| datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Response_var) | |
| else: | |
| all_columns.append('Automl_feature_'+'0'+str(count_n_)) | |
| datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Response_var) | |
| Features_types_conversion.append(datatypes__er_rt) | |
| elif data_type_s=='int': | |
| if len(list(str(count_n_)))==2: | |
| all_columns.append('Automl_feature_'+str(count_n_)) | |
| datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Predictor_var) | |
| else: | |
| all_columns.append('Automl_feature_'+'0'+str(count_n_)) | |
| datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Predictor_var) | |
| Features_types_conversion.append(datatypes__ew) | |
| elif data_type_s=='float': | |
| if len(list(str(count_n_)))==2: | |
| all_columns.append('Automl_feature_'+str(count_n_)) | |
| datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Float_var) | |
| else: | |
| all_columns.append('Automl_feature_'+'0'+str(count_n_)) | |
| datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Float_var) | |
| Features_types_conversion.append(datatypes__ew) | |
| count_n_+=1 | |
| body_features=response_var | |
| for body_s in Features_types_conversion: | |
| body_features+= "{}\n".format(body_s) | |
| data_types_transformation = """ val Automl_dataFeatures = Seq( | |
| Variable_name_ | |
| ).transmogrify()""" | |
| all_colp="" | |
| i_count=0 | |
| data_ery="" | |
| max_ra = int(len(all_columns)/3) | |
| for Data_s in range(len(all_columns)): | |
| if Data_s<max_ra: | |
| if all_columns[Data_s * 3:(Data_s + 1) * 3]: | |
| data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])+','+'\n'+' ') | |
| else: | |
| if all_columns[Data_s * 3:(Data_s + 1) * 3]: | |
| data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])) | |
| res_code =""" val sanityCheck = true | |
| val finalFeatures = if (sanityCheck) respon_var.sanityCheck(Automl_dataFeatures) else Automl_dataFeatures | |
| val prediction = BinaryClassificationModelSelector.withTrainValidationSplit( | |
| modelTypesToUse = Seq(OpLogisticRegression) | |
| ).setInput(respon_var, Automl_dataFeatures).getOutput() | |
| val evaluator = Evaluators.BinaryClassification().setLabelCol(respon_var).setPredictionCol(prediction) | |
| import spark.implicits._ | |
| val trainDataReader = DataReaders.Simple.csvCase[Automl_data]( | |
| path = Option(csvFilePath), | |
| key = _.id.toString | |
| ) | |
| val workflow = | |
| new OpWorkflow() | |
| .setResultFeatures(respon_var, prediction) | |
| .setReader(trainDataReader) | |
| val fittedWorkflow = workflow.train() | |
| val (dataframe, metrics) = fittedWorkflow.scoreAndEvaluate(evaluator = evaluator) | |
| println("Transformed dataframe columns:") | |
| dataframe.columns.foreach(println) | |
| println("Metrics:") | |
| fittedWorkflow .save("/tmp/my-model1") | |
| println("model_saved") | |
| // Load the model | |
| val loadedModel = workflow.loadModel("/tmp/my-model1") | |
| println("model_loaded") | |
| // Score the loaded model | |
| val Tpo_datassssDatas = DataReaders.Simple.csvCase[Automl_data]( | |
| Option(csvFilePaths), | |
| key = _.id.toString) | |
| val scores = loadedModel.setReader(Tpo_datassssDatas).score() | |
| print("model_scored") | |
| scores.write.json("/tmp/my-model13") | |
| scores.show(true) | |
| println(loadedModel.summaryPretty()) | |
| } | |
| }""" | |
| trans_d = re.sub(re.escape('Variable_name_'),data_ery,data_types_transformation) | |
| output = """{0}\n\ncase class Automl_data | |
| ( | |
| {1} | |
| )\n\n{2}\n\n{3}\n{4}\n\n\n{5}\n""".format(header_part(),body,Scala_object,body_features,trans_d,res_code) | |
| try: | |
| os.remove('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala') | |
| except FileNotFoundError: | |
| pass | |
| with open('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala','a+') as f: | |
| f.write(output) | |
| return "Scala_code_generated.." | |
| # coding: utf-8 | |
| #importing libraries | |
| from flask import Flask, abort, request | |
| import json | |
| import shlex | |
| import subprocess | |
| import re | |
| import os | |
| import time | |
| from flask_cors import CORS | |
| app = Flask(__name__) | |
| @app.route('/Command_execution', methods=['POST']) | |
| def Command_execution(): | |
| cmd = './gradlew -q sparkSubmit -Dmain=com.salesforce.hw.Newrr -Dargs="demo.csv demo2.csv"' | |
| os.chdir('/Users/monk/TransmogrifAI/helloworld') | |
| print('switch_to_directory_') | |
| train_data = request.json['train_data'] | |
| target_variable = request.json['target'] | |
| test_data = request.json['test_data'] | |
| print(train_data , target_variable , test_data ) | |
| try: | |
| subprocess.Popen("rm -r /tmp/my-model13", shell=True) | |
| except Exception: | |
| pass | |
| code_generator(train_data,target_variable) | |
| print("code_generated") | |
| replaced_data = cmd.replace('demo.csv',train_data).replace('demo2.csv',train_data) | |
| print("command_s",replaced_data) | |
| execute_command = subprocess.Popen(shlex.split(replaced_data), stdout=subprocess.PIPE).communicate() | |
| #setting environment variable | |
| os.environ["SPARK_HOME"] = "/Users/monk/spark-2.2.1-bin-hadoop2.7" | |
| result =[] | |
| pattern = r"Selected Model - .+?None" | |
| for patten_ in re.findall(pattern,str(execute_command)): | |
| for result_ in patten_.split('\\n'): | |
| result.append(result_) | |
| return json.dumps({'result_':result}) | |
| if __name__ == '__main__': | |
| app.run(port=8890, debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment