lucinda-lim’s gists

lucinda-lim / gist:94984fd8ca1c8911159e5dc4a609b624

Created October 30, 2020 05:42

score.py

	result_df=pd.DataFrame(predictions)
	result_df[result_df.trend_label==1].sort_values(by='score',ascending=False)

lucinda-lim / multiprocessing.py

Created October 30, 2020 04:58

	p = Pool(cpu_count())
	search_term=search_df.columns[1:]
	predictions = list(tqdm(p.imap(prophet_prediction, search_term), total=len(search_term)))

lucinda-lim / Prophet.py

Last active October 30, 2020 05:39

	import pandas as pd
	import numpy as np
	from datetime import datetime, timedelta
	from scipy import stats
	from scipy.stats import t,ttest_ind
	from fbprophet import Prophet
	from tqdm import tqdm
	from multiprocessing import Pool, cpu_count
	##---------------------------------------------------------------------------------------------------------------------------------------
	def prophet_prediction(search_term):

lucinda-lim / dominant_topic.py

Last active October 30, 2020 05:20

	def format_topics_sentences(ldamodel=optimal_model, corpus=corpus, texts=data_lemmatized,image_id_list=list(df.Document_Name)):
	# Init output
	sent_topics_df = pd.DataFrame()
	for i,row_list in enumerate (ldamodel[corpus]):
	## the model computes most likely topic for each word in format of ('topic',%)
	row=row_list[0] if ldamodel.per_word_topics else row_list
	## sort topic based on largest probability(x[1]) to smallest
	row=sorted(row, key=lambda x: (x[1]), reverse=True)
	## Get dominant_topic, percentage_contribution, keywords for each document
	for j,(topic_num,prop_topic) in enumerate(row):

lucinda-lim / topic_coherence.py

Last active October 30, 2020 05:20

	def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
	coherence_values = []
	model_list = []
	for num_topics in range(start, limit, step):
	model = gensim.models.ldamodel.LdaModel(corpus=corpus,
	id2word=id2word,
	num_topics=num_topics,
	random_state=100,
	update_every=1,
	chunksize=100,

lucinda-lim / LDA_model.py

Last active October 30, 2020 05:21

	id2word = corpora.Dictionary(data_lemmatized)
	texts = data_lemmatized
	corpus = [id2word.doc2bow(text) for text in texts]

	lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
	id2word=id2word,
	num_topics=6,
	random_state=100,
	update_every=1,
	chunksize=100,

lucinda-lim / text_preprocessing.py

Last active October 30, 2020 05:21

	from nltk.corpus import stopwords
	import spacy
	import gensim
	import gensim.corpora as corpora
	from gensim.utils import simple_preprocess
	from gensim.models import CoherenceModel
	from pprint import pprint
	import pyLDAvis
	import pyLDAvis.gensim
	import matplotlib.pyplot as plt

lucinda-lim / Welch_Test.py

Last active October 30, 2020 05:22

	## data_a: forecast_value, data_b: pass_value
	def Welch_ttest(data_a, data_b, alpha):
	mean_a, mean_b = np.mean(data_a), np.mean(data_b)
	# squared standard error
	se_a, se_b = stats.tstd(data_a)2.0/len(data_a), stats.tstd(data_b)2.0/len(data_b)
	#v
	v_a, v_b = len(data_a)-1, len(data_b)-1
	# assume unequal variance
	t_stat, p_2tail = ttest_ind(data_a, data_b, equal_var=False)
	# degree of freedom

lucinda-lim / classifier_from_little_data_script_3.py

Created July 27, 2019 08:12 — forked from fchollet/classifier_from_little_data_script_3.py

Fine-tuning a Keras model. Updated to the Keras 2.0 API.

	'''This script goes along the blog post
	"Building powerful image classification models using very little data"
	from blog.keras.io.
	It uses data that can be downloaded at:
	https://www.kaggle.com/c/dogs-vs-cats/data
	In our setup, we:
	- created a data/ folder
	- created train/ and validation/ subfolders inside data/
	- created cats/ and dogs/ subfolders inside train/ and validation/
	- put the cat pictures index 0-999 in data/train/cats

lucinda-lim / classifier_from_little_data_script_2.py

Created July 27, 2019 08:12 — forked from fchollet/classifier_from_little_data_script_2.py

Updated to the Keras 2.0 API.

	'''This script goes along the blog post
	"Building powerful image classification models using very little data"
	from blog.keras.io.
	It uses data that can be downloaded at:
	https://www.kaggle.com/c/dogs-vs-cats/data
	In our setup, we:
	- created a data/ folder
	- created train/ and validation/ subfolders inside data/
	- created cats/ and dogs/ subfolders inside train/ and validation/
	- put the cat pictures index 0-999 in data/train/cats

Lucinda Lim lucinda-lim