Skip to content

Instantly share code, notes, and snippets.

result_df=pd.DataFrame(predictions)
result_df[result_df.trend_label==1].sort_values(by='score',ascending=False)
p = Pool(cpu_count())
search_term=search_df.columns[1:]
predictions = list(tqdm(p.imap(prophet_prediction, search_term), total=len(search_term)))
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats
from scipy.stats import t,ttest_ind
from fbprophet import Prophet
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
##---------------------------------------------------------------------------------------------------------------------------------------
def prophet_prediction(search_term):
def format_topics_sentences(ldamodel=optimal_model, corpus=corpus, texts=data_lemmatized,image_id_list=list(df.Document_Name)):
# Init output
sent_topics_df = pd.DataFrame()
for i,row_list in enumerate (ldamodel[corpus]):
## the model computes most likely topic for each word in format of ('topic',%)
row=row_list[0] if ldamodel.per_word_topics else row_list
## sort topic based on largest probability(x[1]) to smallest
row=sorted(row, key=lambda x: (x[1]), reverse=True)
## Get dominant_topic, percentage_contribution, keywords for each document
for j,(topic_num,prop_topic) in enumerate(row):
def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
coherence_values = []
model_list = []
for num_topics in range(start, limit, step):
model = gensim.models.ldamodel.LdaModel(corpus=corpus,
id2word=id2word,
num_topics=num_topics,
random_state=100,
update_every=1,
chunksize=100,
id2word = corpora.Dictionary(data_lemmatized)
texts = data_lemmatized
corpus = [id2word.doc2bow(text) for text in texts]
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
id2word=id2word,
num_topics=6,
random_state=100,
update_every=1,
chunksize=100,
from nltk.corpus import stopwords
import spacy
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
from pprint import pprint
import pyLDAvis
import pyLDAvis.gensim
import matplotlib.pyplot as plt
## data_a: forecast_value, data_b: pass_value
def Welch_ttest(data_a, data_b, alpha):
mean_a, mean_b = np.mean(data_a), np.mean(data_b)
# squared standard error
se_a, se_b = stats.tstd(data_a)**2.0/len(data_a), stats.tstd(data_b)**2.0/len(data_b)
#v
v_a, v_b = len(data_a)-1, len(data_b)-1
# assume unequal variance
t_stat, p_2tail = ttest_ind(data_a, data_b, equal_var=False)
# degree of freedom
@lucinda-lim
lucinda-lim / classifier_from_little_data_script_3.py
Created July 27, 2019 08:12 — forked from fchollet/classifier_from_little_data_script_3.py
Fine-tuning a Keras model. Updated to the Keras 2.0 API.
'''This script goes along the blog post
"Building powerful image classification models using very little data"
from blog.keras.io.
It uses data that can be downloaded at:
https://www.kaggle.com/c/dogs-vs-cats/data
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created cats/ and dogs/ subfolders inside train/ and validation/
- put the cat pictures index 0-999 in data/train/cats
'''This script goes along the blog post
"Building powerful image classification models using very little data"
from blog.keras.io.
It uses data that can be downloaded at:
https://www.kaggle.com/c/dogs-vs-cats/data
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created cats/ and dogs/ subfolders inside train/ and validation/
- put the cat pictures index 0-999 in data/train/cats