import openai import os import pickle openai.api_key = 'update_your_openai_API_key_here' if os.path.exists('../data/nlp/davinci_emb.pkl'): print('Loading Davinci Embeddings') with open('../data/nlp/davinci_emb.pkl', 'rb') as f: davinci_emb = pickle.load(f) else: print('Querying Davinci Embeddings') davinci_emb = {} engine='text-similarity-davinci-001' unique_sentences = list(set(stsb_test['sentence1'].values.tolist() + stsb_test['sentence2'].values.tolist())) for sentence in tqdm(unique_sentences): if sentence not in davinci_emb.keys(): davinci_emb[sentence] = openai.Embedding.create(input = [sentence], engine=engine)['data'][0]['embedding'] # Save embeddings to file with open('../data/nlp/davinci_emb.pkl', 'wb') as f: pickle.dump(davinci_emb, f) # Generate Embeddings sentence1_emb = [davinci_emb[sentence] for sentence in stsb_test['sentence1']] sentence2_emb = [davinci_emb[sentence] for sentence in stsb_test['sentence2']] # Cosine Similarity stsb_test['OpenAI Davinci_cosine_score'] = cos_sim(sentence1_emb, sentence2_emb)