Jinyuan xrazor1031

衣

spark 2.4.3 bloomd java client

No matter how large ulimit is set. IOException "too many open files" occurs here and there when shuffling.

jar shaded, to rename class name when assemble jar.

	import pandas as pd
	from hdfs import *
	client = Client("http://127.0.0.1:50070")

	def read_as_df(path):
	with client.read(path) as reader:
	_df = pd.read_json(reader.read(), lines=True)
	return _df

	import pandas as pd
	import glob
	# path = "/*"
	path = "/*.csv"
	all_files = glob.glob(path)

	li = []

	for filename in all_files:
	df = pd.read_json(filename, lines=True)

	// method 1
	import org.apache.spark.ml.linalg.Vectors
	import org.apache.spark.ml.feature.LabeledPoint
	val pos = LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0))
	val neg = LabeledPoint(0.0, Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)))

	val df = Seq(neg,pos).toDF("label","features")
	df.write.format("libsvm").save("/data/foo")

	// method 2

	from http import cookiejar
	from selenium import webdriver
	import simplejson
	jar = cookiejar.CookieJar()
	# export from editthiscookie
	cookies = simplejson.load(open("cookie.txt"))
	# login_url =''
	upload_url = ''
	# MUST visit url first, otherwise report the error: invalid domain error
	driver = webdriver.Chrome(executable_path="chromedriver")

	#Rewritten code from /r2/r2/lib/db/_sorts.pyx
	#威尔逊区间
	from math import sqrt

	def confidence(ups, downs):
	n = ups + downs

	if n == 0:
	return 0

	import pyspark.sql.functions as F
	df.select([F.count(F.when(F.isnan(c), c)).alias(c) for c in df.columns]).show()

	// auto.js
	auto.waitFor();
	var height = device.height;
	var width = device.width;
	toast("\n设备宽" + width + "\n" + "设备高" + height + "\n" + "手机型号" + device.model + "\n安卓版本" + device.release)
	setScreenMetrics(width, height);lingqu();

	function lingqu() {
	app.launchApp("手机淘宝");
	toast("打开淘宝")