Created
November 10, 2025 11:34
-
-
Save BexTuychiev/18846ffc6075b678fe5ee10aeb8bfad5 to your computer and use it in GitHub Desktop.
Revisions
-
BexTuychiev created this gist
Nov 10, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,234 @@ """Multi-Model Comparison Chat: Qwen3, GPT-5, and Claude Sonnet 4.5 side-by-side""" import os import time from typing import Dict, List, Optional from concurrent.futures import ThreadPoolExecutor, as_completed from dotenv import load_dotenv import streamlit as st from openai import OpenAI from anthropic import Anthropic load_dotenv() MODEL_CONFIGS = { "Qwen3 Max Thinking": {"icon": "π€"}, "GPT-5": {"icon": "π§ "}, "Claude Sonnet 4.5": {"icon": "π―"} } QWEN_CLIENT = OpenAI( api_key=os.getenv("DASHSCOPE_API_KEY"), base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1" ) GPT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) CLAUDE_CLIENT = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) def error_response(error_msg: str) -> Dict: """Return standardized error response.""" return { "content": None, "reasoning_content": None, "response_time": 0, "tokens_used": {"input": 0, "output": 0, "total": 0}, "error": error_msg } def call_model(model_name: str, messages: List[Dict], **kwargs) -> Dict: """Call any model with unified interface.""" try: start_time = time.time() if model_name == "Qwen3 Max Thinking": enable_thinking = kwargs.get("enable_thinking", True) thinking_budget = kwargs.get("thinking_budget") extra_body = {"enable_thinking": enable_thinking} if thinking_budget: extra_body["thinking_budget"] = thinking_budget completion = QWEN_CLIENT.chat.completions.create( model="qwen3-max-preview", messages=messages, extra_body=extra_body ) reasoning_content = getattr(completion.choices[0].message, 'reasoning_content', None) return { "content": completion.choices[0].message.content, "reasoning_content": reasoning_content, "response_time": time.time() - start_time, "tokens_used": { "input": completion.usage.prompt_tokens, "output": completion.usage.completion_tokens, "total": completion.usage.total_tokens }, "error": None } elif model_name == "GPT-5": reasoning_effort = kwargs.get("reasoning_effort", "medium") input_messages = [{"role": m["role"], "content": m["content"]} for m in messages] response = GPT_CLIENT.responses.create( model="gpt-5", input=input_messages, reasoning={"effort": reasoning_effort, "summary": "auto"} ) reasoning_text, content_text = None, "" for item in response.output: if item.type == "reasoning" and hasattr(item, "summary"): summaries = [s.text for s in item.summary if hasattr(s, "text")] reasoning_text = "\n\n".join(summaries) if summaries else None elif item.type == "message" and hasattr(item, "content"): content_text += "".join(c.text for c in item.content if hasattr(c, "text")) return { "content": content_text, "reasoning_content": reasoning_text, "response_time": time.time() - start_time, "tokens_used": { "input": getattr(response.usage, "input_tokens", 0), "output": getattr(response.usage, "output_tokens", 0), "total": getattr(response.usage, "total_tokens", 0) }, "error": None } else: # Claude Sonnet 4.5 enable_thinking = kwargs.get("enable_thinking", True) params = {"model": "claude-sonnet-4-5", "max_tokens": 10000, "messages": messages} if enable_thinking: params["thinking"] = {"type": "enabled", "budget_tokens": 5000} message = CLAUDE_CLIENT.messages.create(**params) content_text, thinking_text = "", None for block in message.content: if block.type == "thinking": thinking_text = block.thinking elif block.type == "text": content_text += block.text return { "content": content_text, "reasoning_content": thinking_text, "response_time": time.time() - start_time, "tokens_used": { "input": message.usage.input_tokens, "output": message.usage.output_tokens, "total": message.usage.input_tokens + message.usage.output_tokens }, "error": None } except Exception as e: return error_response(f"{model_name} Error: {str(e)}") def render_response_card(model_name: str, response_data: Dict): """Render model response with thinking process.""" st.markdown(f"### {MODEL_CONFIGS[model_name]['icon']} {model_name}") if response_data["error"]: return st.error(response_data["error"]) if response_data["reasoning_content"]: with st.expander("π§ Thinking Process", expanded=False): st.markdown(response_data["reasoning_content"]) st.markdown(response_data["content"]) def call_models_parallel(messages: List[Dict], selected_models: List[str], qwen_thinking: bool, gpt5_reasoning: str, claude_thinking: bool) -> Dict[str, Dict]: """Call multiple models in parallel.""" model_kwargs = { "Qwen3 Max Thinking": {"enable_thinking": qwen_thinking}, "GPT-5": {"reasoning_effort": gpt5_reasoning}, "Claude Sonnet 4.5": {"enable_thinking": claude_thinking} } with ThreadPoolExecutor(max_workers=3) as executor: futures = { executor.submit(call_model, m, messages, **model_kwargs[m]): m for m in selected_models if m in model_kwargs } return {futures[f]: f.result() for f in as_completed(futures)} def main(): """Main Streamlit application.""" st.set_page_config( page_title="Multi-Model Comparison Chat", page_icon="π€", layout="wide", initial_sidebar_state="expanded" ) st.session_state.setdefault("messages", []) st.session_state.setdefault("model_responses", []) st.title("π€ Multi-Model Comparison Chat") st.markdown(""" Compare **Qwen3 Max Thinking**, **GPT-5**, and **Claude Sonnet 4.5** side-by-side. All three models support reasoning/thinking modes - see how they approach problems differently. """) # Always use all three models selected_models = ["Qwen3 Max Thinking", "GPT-5", "Claude Sonnet 4.5"] with st.sidebar: st.header("βοΈ Settings") st.subheader("Thinking Mode") qwen_thinking = st.checkbox("Enable Qwen3 Thinking", value=True) gpt5_reasoning = st.selectbox( "GPT-5 Reasoning Effort", options=["minimal", "low", "medium", "high"], index=2, help="Higher effort = better quality but slower and more expensive" ) claude_thinking = st.checkbox("Enable Claude Thinking", value=True) st.divider() st.subheader("API Status") st.markdown(f""" - Qwen3: {"β " if os.getenv("DASHSCOPE_API_KEY") else "β"} - GPT-5: {"β " if os.getenv("OPENAI_API_KEY") else "β"} - Claude: {"β " if os.getenv("ANTHROPIC_API_KEY") else "β"} """) st.divider() if st.button("ποΈ Clear Conversation", use_container_width=True): st.session_state.messages = [] st.session_state.model_responses = [] st.rerun() if st.session_state.messages: for i, msg in enumerate(st.session_state.messages): with st.chat_message(msg["role"]): st.markdown(msg["content"]) if msg["role"] == "user" and i < len(st.session_state.model_responses): responses = st.session_state.model_responses[i] if responses: cols = st.columns(len(responses)) for idx, (model_name, response_data) in enumerate(responses.items()): with cols[idx]: render_response_card(model_name, response_data) if prompt := st.chat_input("Ask a question to all models..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) api_messages = [{"role": m["role"], "content": m["content"]} for m in st.session_state.messages] with st.spinner("π€ Models are thinking..."): responses = call_models_parallel( api_messages, selected_models, qwen_thinking, gpt5_reasoning, claude_thinking ) st.session_state.model_responses.append(responses) cols = st.columns(len(responses)) for idx, (model_name, response_data) in enumerate(responses.items()): with cols[idx]: render_response_card(model_name, response_data) if __name__ == "__main__": main()