Last active
August 22, 2023 17:43
-
-
Save qfeuilla/06cc7f2980a87b2b265725a849acb56c to your computer and use it in GitHub Desktop.
Revisions
-
qfeuilla revised this gist
Aug 22, 2023 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,7 +28,6 @@ all_text = all_text.split() print(f"Amount of tokens: {len(tokens)}") # 250 words per pages print(f"Estimated number of pages: {len(all_text) / 250} pages") -
qfeuilla renamed this gist
Aug 22, 2023 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
qfeuilla renamed this gist
Aug 22, 2023 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
qfeuilla created this gist
Aug 22, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,35 @@ # Step 1: Download ChatGPT history # Go to ChatGPT -> click on profile (...) -> Settings & Beta -> Data controls -> Export data # You should receive a zip by email from OpenAI PATH_TO_EXPORT_CONVERSATION = "" # Set this as the path of the "conversations.json" file in the zip you received # Step 2: Execute this code import pandas as pd import tiktoken encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") all_text = "" data = pd.read_json(f"{PATH_TO_EXPORT_CONVERSATION}").sort_values(by="create_time", ascending=True) days_of_interactions = (data.iloc[-1]["create_time"] - data.iloc[0]["create_time"]).days print(f"""Amount of days between first and last interactions: {days_of_interactions}""") for row in data.iterrows(): mapping = row[1]["mapping"] for i in mapping: try: for text in mapping[i]["message"]["content"]["parts"]: all_text += text + " " except: pass tokens = encoding.encode(all_text) all_text = all_text.split() print(f"Amount of tokens: {len(tokens)}") # 250 words per pages print(f"Estimated number of pages: {len(all_text) / 250} pages") print(f"Estimated number of pages per weekdays: {len(all_text) / 250 / ((days_of_interactions / 7) * 5)}")