Skip to content

Instantly share code, notes, and snippets.

@monk1337
Created September 30, 2018 15:28
Show Gist options
  • Select an option

  • Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.

Select an option

Save monk1337/cc5bad8f6da499abeaa3b2b7e02520d1 to your computer and use it in GitHub Desktop.

Revisions

  1. Aaditya Paul created this gist Sep 30, 2018.
    316 changes: 316 additions & 0 deletions nm.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,316 @@
    import pandas as pd
    import re

    def code_generator(Train_data,target_variable):
    load_data = pd.read_csv(Train_data)
    target_var = load_data.columns.get_loc(target_variable)
    c = load_data.columns
    if int(target_variable)!=1:
    load_data[[c[1], c[target_var]]] = load_data[[c[target_var], c[1]]]
    load_data.to_csv('train_data',header=False,index=False)
    load_data_s = pd.read_csv('train_data')


    def header_part():


    headerss= """
    package com.salesforce.hw
    import com.salesforce.op._
    import com.salesforce.op.evaluators.Evaluators
    import com.salesforce.op.features.FeatureBuilder
    import com.salesforce.op.features.types._
    import com.salesforce.op.readers.DataReaders
    import com.salesforce.op.stages.impl.classification.BinaryClassificationModelSelector
    import com.salesforce.op.stages.impl.classification.BinaryClassificationModelsToTry._
    import org.apache.spark.SparkConf
    import org.apache.spark.sql.SparkSession"""

    return headerss


    data_loading = load_data_s
    response_type = 'Data_s'


    datatypes_ = dict(data_loading.dtypes)
    if response_type =='Data_s':
    response_var = """ val respon_var = FeatureBuilder.RealNN[Automl_data].extract(
    _.respon_var.toRealNN).asResponse\n"""
    elif response_type=='c':
    response_var = """ val respon_var = FeatureBuilder.PickList[Automl_data].extract(
    _.respon_var.map(_.toString).toPickList).asResponse\n"""



    Predictor_var = """ val demo_name = FeatureBuilder.Integral[Automl_data].extract(
    _.demo_name.toIntegral).asPredictor"""
    Response_var = """ val demo_name = FeatureBuilder.PickList[Automl_data].extract(
    _.demo_name.map(_.toString).toPickList).asPredictor"""
    Float_var = """ val demo_name = FeatureBuilder.Real[Automl_data].extract(
    _.demo_name.toReal).asPredictor"""

    Variable_name_ = 'Automl_feature_'
    Type_definition ={}
    responce_column = 1


    columns_list=list(datatypes_.items())
    Type_definition['id']='{}'.format('Int')

    if response_type =='c':
    Type_definition['respon_var']='Option[{}]'.format('String')
    elif response_type == 'Data_s':
    Type_definition['respon_var']='{}'.format('Int')

    count_ = 3
    for id_string in columns_list[2:]:
    if id_string[1]=='int64':
    if len(list(str(count_)))==2:
    Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Int')
    else:
    Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Int')




    elif id_string[1]=='object':
    if len(list(str(count_)))==2:
    Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('String')
    else:
    Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('String')


    elif id_string[1]=='float':
    if len(list(str(count_)))==2:
    Type_definition[Variable_name_+str(count_)]='Option[{}]'.format('Double')
    else:
    Type_definition[Variable_name_+'0'+str(count_)]='Option[{}]'.format('Double')

    count_+=1






    name = 'demo'

    Scala_object="""object Newrr {
    def main(args: Array[String]): Unit = {
    if (args.isEmpty) {
    println("You need to pass in the CSV file path as an argument")
    sys.exit(1)
    }
    val csvFilePath = args(0)
    val csvFilePaths = args(1)
    println(s"Using user-supplied CSV file path: $csvFilePath")
    // Set up a SparkSession as normal
    val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$"))
    implicit val spark = SparkSession.builder.config(conf).getOrCreate()"""

    Spark_session = """ val conf = new SparkConf().setAppName(this.getClass.getSimpleName.stripSuffix("$"))
    implicit val spark = SparkSession.builder.config(conf).getOrCreate()"""




    body = ""
    max_lims = len(Type_definition)
    count_n =1
    for key, value in Type_definition.items():
    if count_n>=max_lims:
    body += " {0}: {1}".format(key, value)
    else:
    body += " {0}: {1},\n".format(key, value,)
    count_n+=1



    Features_types_conversion= []
    count_n_=1
    add_=[]
    all_columns=[]
    for Data_s,data_type_s in datatypes_.items():

    if count_n_>=3:


    if data_type_s=='object':


    if len(list(str(count_n_)))==2:

    all_columns.append('Automl_feature_'+str(count_n_))
    datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Response_var)
    else:

    all_columns.append('Automl_feature_'+'0'+str(count_n_))
    datatypes__er_rt= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Response_var)
    Features_types_conversion.append(datatypes__er_rt)
    elif data_type_s=='int':



    if len(list(str(count_n_)))==2:
    all_columns.append('Automl_feature_'+str(count_n_))
    datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Predictor_var)
    else:
    all_columns.append('Automl_feature_'+'0'+str(count_n_))
    datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Predictor_var)
    Features_types_conversion.append(datatypes__ew)

    elif data_type_s=='float':

    if len(list(str(count_n_)))==2:
    all_columns.append('Automl_feature_'+str(count_n_))
    datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+str(count_n_),Float_var)
    else:
    all_columns.append('Automl_feature_'+'0'+str(count_n_))
    datatypes__ew= re.sub(re.escape('demo_name'),'Automl_feature_'+'0'+str(count_n_),Float_var)
    Features_types_conversion.append(datatypes__ew)






    count_n_+=1
    body_features=response_var
    for body_s in Features_types_conversion:
    body_features+= "{}\n".format(body_s)


    data_types_transformation = """ val Automl_dataFeatures = Seq(
    Variable_name_
    ).transmogrify()"""


    all_colp=""
    i_count=0


    data_ery=""
    max_ra = int(len(all_columns)/3)
    for Data_s in range(len(all_columns)):
    if Data_s<max_ra:
    if all_columns[Data_s * 3:(Data_s + 1) * 3]:
    data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3])+','+'\n'+' ')
    else:
    if all_columns[Data_s * 3:(Data_s + 1) * 3]:
    data_ery+="{}".format(", ".join(all_columns[Data_s * 3:(Data_s + 1) * 3]))

    res_code =""" val sanityCheck = true
    val finalFeatures = if (sanityCheck) respon_var.sanityCheck(Automl_dataFeatures) else Automl_dataFeatures
    val prediction = BinaryClassificationModelSelector.withTrainValidationSplit(
    modelTypesToUse = Seq(OpLogisticRegression)
    ).setInput(respon_var, Automl_dataFeatures).getOutput()
    val evaluator = Evaluators.BinaryClassification().setLabelCol(respon_var).setPredictionCol(prediction)
    import spark.implicits._
    val trainDataReader = DataReaders.Simple.csvCase[Automl_data](
    path = Option(csvFilePath),
    key = _.id.toString
    )
    val workflow =
    new OpWorkflow()
    .setResultFeatures(respon_var, prediction)
    .setReader(trainDataReader)
    val fittedWorkflow = workflow.train()
    val (dataframe, metrics) = fittedWorkflow.scoreAndEvaluate(evaluator = evaluator)
    println("Transformed dataframe columns:")
    dataframe.columns.foreach(println)
    println("Metrics:")
    fittedWorkflow .save("/tmp/my-model1")
    println("model_saved")
    // Load the model
    val loadedModel = workflow.loadModel("/tmp/my-model1")
    println("model_loaded")
    // Score the loaded model
    val Tpo_datassssDatas = DataReaders.Simple.csvCase[Automl_data](
    Option(csvFilePaths),
    key = _.id.toString)
    val scores = loadedModel.setReader(Tpo_datassssDatas).score()
    print("model_scored")
    scores.write.json("/tmp/my-model13")
    scores.show(true)
    println(loadedModel.summaryPretty())
    }
    }"""


    trans_d = re.sub(re.escape('Variable_name_'),data_ery,data_types_transformation)




    output = """{0}\n\ncase class Automl_data
    (
    {1}
    )\n\n{2}\n\n{3}\n{4}\n\n\n{5}\n""".format(header_part(),body,Scala_object,body_features,trans_d,res_code)

    try:
    os.remove('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala')
    except FileNotFoundError:
    pass
    with open('/Users/monk/TransmogrifAI/helloworld/src/main/scala/com/salesforce/hw/Newrr.scala','a+') as f:
    f.write(output)

    return "Scala_code_generated.."


    # coding: utf-8
    #importing libraries

    from flask import Flask, abort, request
    import json
    import shlex
    import subprocess
    import re
    import os
    import time
    from flask_cors import CORS
    app = Flask(__name__)


    @app.route('/Command_execution', methods=['POST'])

    def Command_execution():

    cmd = './gradlew -q sparkSubmit -Dmain=com.salesforce.hw.Newrr -Dargs="demo.csv demo2.csv"'

    os.chdir('/Users/monk/TransmogrifAI/helloworld')
    print('switch_to_directory_')

    train_data = request.json['train_data']
    target_variable = request.json['target']
    test_data = request.json['test_data']
    print(train_data , target_variable , test_data )
    try:
    subprocess.Popen("rm -r /tmp/my-model13", shell=True)
    except Exception:
    pass

    code_generator(train_data,target_variable)
    print("code_generated")



    replaced_data = cmd.replace('demo.csv',train_data).replace('demo2.csv',train_data)
    print("command_s",replaced_data)
    execute_command = subprocess.Popen(shlex.split(replaced_data), stdout=subprocess.PIPE).communicate()

    #setting environment variable
    os.environ["SPARK_HOME"] = "/Users/monk/spark-2.2.1-bin-hadoop2.7"
    result =[]
    pattern = r"Selected Model - .+?None"
    for patten_ in re.findall(pattern,str(execute_command)):
    for result_ in patten_.split('\\n'):
    result.append(result_)


    return json.dumps({'result_':result})


    if __name__ == '__main__':
    app.run(port=8890, debug=True)