francismurray commited on
Commit
aac64a6
·
1 Parent(s): 83b15cf

Initial commit: LLM Comparison App

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. README.md +62 -1
  3. app.py +178 -0
  4. environment.yml +79 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .DS_Store
2
+ .env
README.md CHANGED
@@ -11,4 +11,65 @@ license: mit
11
  short_description: Compare outputs from text-generation models side by side
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  short_description: Compare outputs from text-generation models side by side
12
  ---
13
 
14
+ <!-- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -->
15
+
16
+ # LLM Comparison Tool
17
+
18
+ A Gradio web application that allows you to compare outputs from different Hugging Face models side by side.
19
+
20
+ ## Features
21
+
22
+ - Compare outputs from two different LLMs simultaneously
23
+ - Simple and clean interface
24
+ - Support for multiple Hugging Face models
25
+ - Text generation using Hugging Face's Inference API
26
+ - Error handling and user feedback
27
+
28
+ ## Setup
29
+
30
+ 1. Clone this repository
31
+
32
+ 2. Create and activate the conda environment:
33
+ ```bash
34
+ conda env create -f environment.yml
35
+ conda activate llm-compare
36
+ ```
37
+
38
+ 3. Create a `.env` file in the root directory and add your Hugging Face API token:
39
+ ```
40
+ HF_TOKEN=your_hugging_face_token_here
41
+ ```
42
+ You can get your token from your [Hugging Face profile settings](https://huggingface.co/settings/tokens).
43
+
44
+ ## Running the App
45
+
46
+ 1. Make sure you have activated the conda environment:
47
+ ```bash
48
+ conda activate llm-compare
49
+ ```
50
+
51
+ 2. Run the application:
52
+ ```bash
53
+ python app.py
54
+ ```
55
+
56
+ 3. Open your browser and navigate to the URL shown in the terminal (typically `http://localhost:7860`)
57
+
58
+ ## Usage
59
+
60
+ 1. Enter your prompt in the text box
61
+ 2. Select two different models from the dropdown menus
62
+ 3. Click "Generate Responses" to see the outputs
63
+ 4. The responses will appear in the chatbot interfaces below each model selection
64
+
65
+ ## Models Available
66
+
67
+ - HuggingFaceH4/zephyr-7b-beta
68
+ - meta-llama/Llama-3.1-8B-Instruct
69
+ - microsoft/Phi-3.5-mini-instruct
70
+ - Qwen/QwQ-32B
71
+
72
+ ## Notes
73
+
74
+ - Make sure you have a valid Hugging Face API token with appropriate permissions
75
+ - Response times may vary depending on the model size and server load
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+ from huggingface_hub import InferenceClient
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+ HF_TOKEN = os.getenv("HF_TOKEN")
9
+
10
+ if not HF_TOKEN:
11
+ raise ValueError("Please set HF_TOKEN environment variable")
12
+
13
+ # Available models
14
+ AVAILABLE_MODELS = [
15
+ "HuggingFaceH4/zephyr-7b-beta",
16
+ "meta-llama/Llama-3.1-8B-Instruct",
17
+ "microsoft/Phi-3.5-mini-instruct",
18
+ "Qwen/QwQ-32B",
19
+ ]
20
+
21
+ # Initialize inference client
22
+ inference_client = InferenceClient(token=HF_TOKEN)
23
+
24
+ def get_model_response(prompt, model_name, temperature_value, do_sample):
25
+ """Get response from a Hugging Face model."""
26
+ try:
27
+ # Build kwargs dynamically
28
+ generation_args = {
29
+ "prompt": prompt,
30
+ "model": model_name,
31
+ "max_new_tokens": 100,
32
+ "do_sample": do_sample,
33
+ "return_full_text": False
34
+ }
35
+
36
+ # Only include temperature if sampling is enabled
37
+ if do_sample and temperature_value > 0:
38
+ generation_args["temperature"] = temperature_value
39
+
40
+ response = inference_client.text_generation(**generation_args)
41
+ return response
42
+
43
+ except Exception as e:
44
+ return f"Error: {str(e)}"
45
+
46
+ def compare_models(prompt, model1, model2, temp1, temp2, do_sample1, do_sample2):
47
+ """Compare outputs from two selected models."""
48
+ if not prompt.strip():
49
+ return (
50
+ [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
51
+ [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
52
+ gr.update(interactive=True)
53
+ )
54
+
55
+ response1 = get_model_response(prompt, model1, temp1, do_sample1)
56
+ response2 = get_model_response(prompt, model2, temp2, do_sample2)
57
+
58
+ # Format responses for chatbot display
59
+ chat1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}]
60
+ chat2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}]
61
+
62
+
63
+ return chat1, chat2, gr.update(interactive=True)
64
+
65
+
66
+ # Update temperature slider interactivity based on sampling checkbox
67
+ def update_slider_state(enabled):
68
+ return [
69
+ gr.update(interactive=enabled),
70
+ gr.update(
71
+ elem_classes=[] if enabled else ["disabled-slider"],
72
+ value=0 if not enabled else None
73
+ )
74
+ ]
75
+
76
+ # Create the Gradio interface
77
+ with gr.Blocks(css="""
78
+ .disabled-slider { opacity: 0.5; pointer-events: none; }
79
+ """) as demo:
80
+ gr.Markdown("# LLM Comparison Tool")
81
+ gr.Markdown("Compare outputs from different Hugging Face models side by side.")
82
+
83
+ with gr.Row():
84
+ prompt = gr.Textbox(
85
+ label="Enter your prompt",
86
+ placeholder="Type your prompt here...",
87
+ lines=3
88
+ )
89
+
90
+ with gr.Row():
91
+ submit_btn = gr.Button("Generate Responses")
92
+
93
+ with gr.Row():
94
+ with gr.Column():
95
+ model1_dropdown = gr.Dropdown(
96
+ choices=AVAILABLE_MODELS,
97
+ value=AVAILABLE_MODELS[0],
98
+ label="Select Model 1"
99
+ )
100
+ do_sample1 = gr.Checkbox(
101
+ label="Enable sampling (random outputs)",
102
+ value=False
103
+ )
104
+ temp1 = gr.Slider(
105
+ label="Temperature (Higher = more creative, lower = more predictable)",
106
+ minimum=0,
107
+ maximum=1,
108
+ step=0.1,
109
+ value=0.0,
110
+ interactive=False,
111
+ elem_classes=["disabled-slider"]
112
+ )
113
+ chatbot1 = gr.Chatbot(
114
+ label="Model 1 Output",
115
+ show_label=True,
116
+ height=300,
117
+ type="messages"
118
+ )
119
+
120
+
121
+ with gr.Column():
122
+ model2_dropdown = gr.Dropdown(
123
+ choices=AVAILABLE_MODELS,
124
+ value=AVAILABLE_MODELS[1],
125
+ label="Select Model 2"
126
+ )
127
+ do_sample2 = gr.Checkbox(
128
+ label="Enable sampling (random outputs)",
129
+ value=False
130
+ )
131
+ temp2 = gr.Slider(
132
+ label="Temperature (Higher = more creative, lower = more predictable)",
133
+ minimum=0,
134
+ maximum=1,
135
+ step=0.1,
136
+ value=0.0,
137
+ interactive=False,
138
+ elem_classes=["disabled-slider"]
139
+ )
140
+ chatbot2 = gr.Chatbot(
141
+ label="Model 2 Output",
142
+ show_label=True,
143
+ height=300,
144
+ type="messages"
145
+ )
146
+
147
+ def start_loading():
148
+ return gr.update(interactive=False)
149
+
150
+ # Handle form submission
151
+ submit_btn.click(
152
+ fn=start_loading,
153
+ inputs=None,
154
+ outputs=submit_btn,
155
+ queue=False
156
+ ).then(
157
+ fn=compare_models,
158
+ inputs=[prompt, model1_dropdown, model2_dropdown, temp1, temp2, do_sample1, do_sample2],
159
+ outputs=[chatbot1, chatbot2, submit_btn]
160
+ )
161
+
162
+
163
+
164
+ do_sample1.change(
165
+ fn=update_slider_state,
166
+ inputs=[do_sample1],
167
+ outputs=[temp1, temp1]
168
+ )
169
+
170
+ do_sample2.change(
171
+ fn=update_slider_state,
172
+ inputs=[do_sample2],
173
+ outputs=[temp2, temp2]
174
+ )
175
+
176
+ if __name__ == "__main__":
177
+ demo.launch()
178
+ # demo.launch(share=True)
environment.yml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: llm_compare
2
+ channels:
3
+ - conda-forge
4
+ dependencies:
5
+ - bzip2=1.0.8=hfdf4475_7
6
+ - ca-certificates=2025.4.26=hbd8a1cb_0
7
+ - libexpat=2.7.0=h240833e_0
8
+ - libffi=3.4.6=h281671d_1
9
+ - liblzma=5.8.1=hd471939_1
10
+ - liblzma-devel=5.8.1=hd471939_1
11
+ - libsqlite=3.49.2=hdb6dae5_0
12
+ - libzlib=1.3.1=hd23fc13_2
13
+ - ncurses=6.5=h0622a9a_3
14
+ - openssl=3.5.0=hc426f3f_1
15
+ - pip=25.1.1=pyh8b19718_0
16
+ - python=3.10.17=h93e8a92_0_cpython
17
+ - python-dotenv=1.1.0=pyh29332c3_1
18
+ - readline=8.2=h7cca4af_2
19
+ - setuptools=80.8.0=pyhff2d567_0
20
+ - tk=8.6.13=h1abcd95_1
21
+ - wheel=0.45.1=pyhd8ed1ab_1
22
+ - xz=5.8.1=h357f2ed_1
23
+ - xz-gpl-tools=5.8.1=h357f2ed_1
24
+ - xz-tools=5.8.1=hd471939_1
25
+ - pip:
26
+ - aiofiles==24.1.0
27
+ - annotated-types==0.7.0
28
+ - anyio==4.9.0
29
+ - certifi==2025.4.26
30
+ - charset-normalizer==3.4.2
31
+ - click==8.1.8
32
+ - exceptiongroup==1.3.0
33
+ - fastapi==0.115.12
34
+ - ffmpy==0.5.0
35
+ - filelock==3.18.0
36
+ - fsspec==2025.5.0
37
+ - gradio==5.29.0
38
+ - gradio-client==1.10.0
39
+ - groovy==0.1.2
40
+ - h11==0.16.0
41
+ - httpcore==1.0.9
42
+ - httpx==0.28.1
43
+ - huggingface-hub==0.31.2
44
+ - idna==3.10
45
+ - jinja2==3.1.6
46
+ - markdown-it-py==3.0.0
47
+ - markupsafe==3.0.2
48
+ - mdurl==0.1.2
49
+ - numpy==2.2.6
50
+ - orjson==3.10.18
51
+ - packaging==25.0
52
+ - pandas==2.2.3
53
+ - pillow==11.2.1
54
+ - pydantic==2.11.4
55
+ - pydantic-core==2.33.2
56
+ - pydub==0.25.1
57
+ - pygments==2.19.1
58
+ - python-dateutil==2.9.0.post0
59
+ - python-multipart==0.0.20
60
+ - pytz==2025.2
61
+ - pyyaml==6.0.2
62
+ - requests==2.32.3
63
+ - rich==14.0.0
64
+ - ruff==0.11.10
65
+ - safehttpx==0.1.6
66
+ - semantic-version==2.10.0
67
+ - shellingham==1.5.4
68
+ - six==1.17.0
69
+ - sniffio==1.3.1
70
+ - starlette==0.46.2
71
+ - tomlkit==0.13.2
72
+ - tqdm==4.67.1
73
+ - typer==0.15.4
74
+ - typing-extensions==4.13.2
75
+ - typing-inspection==0.4.1
76
+ - tzdata==2025.2
77
+ - urllib3==2.4.0
78
+ - uvicorn==0.34.2
79
+ - websockets==15.0.1