Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Created November 10, 2025 11:34
Show Gist options
  • Select an option

  • Save BexTuychiev/18846ffc6075b678fe5ee10aeb8bfad5 to your computer and use it in GitHub Desktop.

Select an option

Save BexTuychiev/18846ffc6075b678fe5ee10aeb8bfad5 to your computer and use it in GitHub Desktop.

Revisions

  1. BexTuychiev created this gist Nov 10, 2025.
    234 changes: 234 additions & 0 deletions model-comparison-chat-app.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,234 @@
    """Multi-Model Comparison Chat: Qwen3, GPT-5, and Claude Sonnet 4.5 side-by-side"""

    import os
    import time
    from typing import Dict, List, Optional
    from concurrent.futures import ThreadPoolExecutor, as_completed
    from dotenv import load_dotenv
    import streamlit as st
    from openai import OpenAI
    from anthropic import Anthropic

    load_dotenv()

    MODEL_CONFIGS = {
    "Qwen3 Max Thinking": {"icon": "πŸ€–"},
    "GPT-5": {"icon": "🧠"},
    "Claude Sonnet 4.5": {"icon": "🎯"}
    }

    QWEN_CLIENT = OpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    )
    GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))


    def error_response(error_msg: str) -> Dict:
    """Return standardized error response."""
    return {
    "content": None, "reasoning_content": None, "response_time": 0,
    "tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg
    }


    def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict:
    """Call any model with unified interface."""
    try:
    start_time = time.time()

    if model_name == "Qwen3 Max Thinking":
    enable_thinking = kwargs.get("enable_thinking", True)
    thinking_budget = kwargs.get("thinking_budget")
    extra_body = {"enable_thinking": enable_thinking}
    if thinking_budget:
    extra_body["thinking_budget"] = thinking_budget

    completion = QWEN_CLIENT.chat.completions.create(
    model="qwen3-max-preview", messages=messages, extra_body=extra_body
    )
    reasoning_content = getattr(completion.choices[0].message, 'reasoning_content', None)

    return {
    "content": completion.choices[0].message.content,
    "reasoning_content": reasoning_content,
    "response_time": time.time() - start_time,
    "tokens_used": {
    "input": completion.usage.prompt_tokens,
    "output": completion.usage.completion_tokens,
    "total": completion.usage.total_tokens
    },
    "error": None
    }

    elif model_name == "GPT-5":
    reasoning_effort = kwargs.get("reasoning_effort", "medium")
    input_messages = [{"role": m["role"], "content": m["content"]} for m in messages]

    response = GPT_CLIENT.responses.create(
    model="gpt-5", input=input_messages,
    reasoning={"effort": reasoning_effort, "summary": "auto"}
    )

    reasoning_text, content_text = None, ""
    for item in response.output:
    if item.type == "reasoning" and hasattr(item, "summary"):
    summaries = [s.text for s in item.summary if hasattr(s, "text")]
    reasoning_text = "\n\n".join(summaries) if summaries else None
    elif item.type == "message" and hasattr(item, "content"):
    content_text += "".join(c.text for c in item.content if hasattr(c, "text"))

    return {
    "content": content_text,
    "reasoning_content": reasoning_text,
    "response_time": time.time() - start_time,
    "tokens_used": {
    "input": getattr(response.usage, "input_tokens", 0),
    "output": getattr(response.usage, "output_tokens", 0),
    "total": getattr(response.usage, "total_tokens", 0)
    },
    "error": None
    }

    else: # Claude Sonnet 4.5
    enable_thinking = kwargs.get("enable_thinking", True)
    params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages}
    if enable_thinking:
    params["thinking"] = {"type": "enabled", "budget_tokens": 5000}

    message = CLAUDE_CLIENT.messages.create(**params)
    content_text, thinking_text = "", None

    for block in message.content:
    if block.type == "thinking":
    thinking_text = block.thinking
    elif block.type == "text":
    content_text += block.text

    return {
    "content": content_text,
    "reasoning_content": thinking_text,
    "response_time": time.time() - start_time,
    "tokens_used": {
    "input": message.usage.input_tokens,
    "output": message.usage.output_tokens,
    "total": message.usage.input_tokens + message.usage.output_tokens
    },
    "error": None
    }

    except Exception as e:
    return error_response(f"{model_name} Error: {str(e)}")


    def render_response_card(model_name: str, response_data: Dict):
    """Render model response with thinking process."""
    st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}")
    if response_data["error"]:
    return st.error(response_data["error"])
    if response_data["reasoning_content"]:
    with st.expander("🧠 Thinking Process", expanded=False):
    st.markdown(response_data["reasoning_content"])
    st.markdown(response_data["content"])


    def call_models_parallel(messages: List[Dict], selected_models: List[str],
    qwen_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]:
    """Call multiple models in parallel."""
    model_kwargs = {
    "Qwen3 Max Thinking": {"enable_thinking": qwen_thinking},
    "GPT-5": {"reasoning_effort": gpt5_reasoning},
    "Claude Sonnet 4.5": {"enable_thinking": claude_thinking}
    }

    with ThreadPoolExecutor(max_workers=3) as executor:
    futures = {
    executor.submit(call_model, m, messages, **model_kwargs[m]): m
    for m in selected_models if m in model_kwargs
    }
    return {futures[f]: f.result() for f in as_completed(futures)}


    def main():
    """Main Streamlit application."""
    st.set_page_config(
    page_title="Multi-Model Comparison Chat", page_icon="πŸ€–",
    layout="wide", initial_sidebar_state="expanded"
    )

    st.session_state.setdefault("messages", [])
    st.session_state.setdefault("model_responses", [])

    st.title("πŸ€– Multi-Model Comparison Chat")
    st.markdown("""
    Compare **Qwen3 Max Thinking**, **GPT-5**, and **Claude Sonnet 4.5** side-by-side.
    All three models support reasoning/thinking modes - see how they approach problems differently.
    """)

    # Always use all three models
    selected_models = ["Qwen3 Max Thinking", "GPT-5", "Claude Sonnet 4.5"]

    with st.sidebar:
    st.header("βš™οΈ Settings")

    st.subheader("Thinking Mode")
    qwen_thinking = st.checkbox("Enable Qwen3 Thinking", value=True)
    gpt5_reasoning = st.selectbox(
    "GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"],
    index=2, help="Higher effort = better quality but slower and more expensive"
    )
    claude_thinking = st.checkbox("Enable Claude Thinking", value=True)

    st.divider()

    st.subheader("API Status")
    st.markdown(f"""
    - Qwen3: {"βœ…" if os.getenv("DASHSCOPE_API_KEY") else "❌"}
    - GPT-5: {"βœ…" if os.getenv("OPENAI_API_KEY") else "❌"}
    - Claude: {"βœ…" if os.getenv("ANTHROPIC_API_KEY") else "❌"}
    """)

    st.divider()

    if st.button("πŸ—‘οΈ Clear Conversation", use_container_width=True):
    st.session_state.messages = []
    st.session_state.model_responses = []
    st.rerun()

    if st.session_state.messages:
    for i, msg in enumerate(st.session_state.messages):
    with st.chat_message(msg["role"]):
    st.markdown(msg["content"])

    if msg["role"] == "user" and i < len(st.session_state.model_responses):
    responses = st.session_state.model_responses[i]
    if responses:
    cols = st.columns(len(responses))
    for idx, (model_name, response_data) in enumerate(responses.items()):
    with cols[idx]:
    render_response_card(model_name, response_data)

    if prompt := st.chat_input("Ask a question to all models..."):
    st.session_state.messages.append({"role": "user", "content": prompt})

    with st.chat_message("user"):
    st.markdown(prompt)

    api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]

    with st.spinner("πŸ€” Models are thinking..."):
    responses = call_models_parallel(
    api_messages, selected_models, qwen_thinking, gpt5_reasoning, claude_thinking
    )

    st.session_state.model_responses.append(responses)

    cols = st.columns(len(responses))
    for idx, (model_name, response_data) in enumerate(responses.items()):
    with cols[idx]:
    render_response_card(model_name, response_data)


    if __name__ == "__main__":
    main()