Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save christopherwoodall/e7a400133cf49fb9678bc6df8d4f8b8b to your computer and use it in GitHub Desktop.
Save christopherwoodall/e7a400133cf49fb9678bc6df8d4f8b8b to your computer and use it in GitHub Desktop.
squash the em-dash
#!/usr/bin/env python3
"""
Demonstrate banning the em-dash (and common variants) with logit_bias.
import os, sys, textwrap
import openai # pip install openai>=1.9.0
MODEL = "chatgpt-4o-latest"
PROMPT = ("Write a short sentence that would normally include an em dash "
"(for example between two clauses).")
# ──────────────────────────────────────────────────────────────
# Exact token-id → bias map (your list, comments preserved)
# ──────────────────────────────────────────────────────────────
LOGIT_BIAS_EM_DASH = {
2322: -100, # '\u2014'
2733: -100, # ' \u2014'
8290: -100, # '\u2014\u2014'
20962: -100, # '\u2014\u2014\u2014\u2014'
35251: -100, # '\u2014and'
41648: -100, # '\u2014\u2014\u2014\u2014\u2014\u2014\u2014\u2014'
51692: -100, # '\u2014the'
54067: -100, # '.\u2014'
65363: -100, # '\u2014a'
87643: -100, # '\u2014\n\n'
94012: -100, # '\u2014but'
94828: -100, # '\u2014\u2014... (long run)'
96754: -100, # '.\u201d\u2014'
108181: -100, # '\u2014that'
114635: -100, # '\u2014it'
118256: -100, # '\u2014in'
121630: -100, # '\u2014or'
121655: -100, # '\u2014to'
123101: -100, # '\u2014\n'
126952: -100, # '\u2014I'
127126: -100, # '\u201d\u2014'
134820: -100, # ' \u2014\n'
137419: -100, # '\u2014which'
140135: -100, # ' \u2014\u2014'
142654: -100, # ' \u2014\n\n'
144129: -100, # ')\u2014'
144787: -100, # '\u2014is'
147994: -100, # ',\u2014'
155638: -100, # '\u2014as'
160984: -100, # '\u2014not'
169785: -100, # '\u2014you'
178328: -100, # '\u2014from'
180500: -100, # '\u2014including'
183122: -100, # '\u2014for'
183862: -100, # '\u200b\u2014'
187349: -100, # '\u2014they'
188860: -100, # '\u2014all'
190702: -100, # '\u2014with'
196615: -100, # '\u2014we'
197618: -100, # '\u2014even'
}
def main() -> None:
openai.api_key = "YOUR_KEY_HERE"
messages = [
{"role": "user", "content": PROMPT},
]
response = openai.chat.completions.create(
model = MODEL,
messages = messages,
max_tokens = 64,
temperature = 0.7,
logit_bias = {str(k): v for k, v in LOGIT_BIAS_EM_DASH.items()},
)
print("\n=== COMPLETION WITH EM-DASH BANNED ===\n")
print(textwrap.fill(response.choices[0].message.content, 80))
print("\n(If banning worked, there should be no em dashes.)\n")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment