Spaces:
Running
Running
Commit
·
aac64a6
1
Parent(s):
83b15cf
Initial commit: LLM Comparison App
Browse files- .gitignore +2 -0
- README.md +62 -1
- app.py +178 -0
- environment.yml +79 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.env
|
README.md
CHANGED
@@ -11,4 +11,65 @@ license: mit
|
|
11 |
short_description: Compare outputs from text-generation models side by side
|
12 |
---
|
13 |
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
short_description: Compare outputs from text-generation models side by side
|
12 |
---
|
13 |
|
14 |
+
<!-- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -->
|
15 |
+
|
16 |
+
# LLM Comparison Tool
|
17 |
+
|
18 |
+
A Gradio web application that allows you to compare outputs from different Hugging Face models side by side.
|
19 |
+
|
20 |
+
## Features
|
21 |
+
|
22 |
+
- Compare outputs from two different LLMs simultaneously
|
23 |
+
- Simple and clean interface
|
24 |
+
- Support for multiple Hugging Face models
|
25 |
+
- Text generation using Hugging Face's Inference API
|
26 |
+
- Error handling and user feedback
|
27 |
+
|
28 |
+
## Setup
|
29 |
+
|
30 |
+
1. Clone this repository
|
31 |
+
|
32 |
+
2. Create and activate the conda environment:
|
33 |
+
```bash
|
34 |
+
conda env create -f environment.yml
|
35 |
+
conda activate llm-compare
|
36 |
+
```
|
37 |
+
|
38 |
+
3. Create a `.env` file in the root directory and add your Hugging Face API token:
|
39 |
+
```
|
40 |
+
HF_TOKEN=your_hugging_face_token_here
|
41 |
+
```
|
42 |
+
You can get your token from your [Hugging Face profile settings](https://huggingface.co/settings/tokens).
|
43 |
+
|
44 |
+
## Running the App
|
45 |
+
|
46 |
+
1. Make sure you have activated the conda environment:
|
47 |
+
```bash
|
48 |
+
conda activate llm-compare
|
49 |
+
```
|
50 |
+
|
51 |
+
2. Run the application:
|
52 |
+
```bash
|
53 |
+
python app.py
|
54 |
+
```
|
55 |
+
|
56 |
+
3. Open your browser and navigate to the URL shown in the terminal (typically `http://localhost:7860`)
|
57 |
+
|
58 |
+
## Usage
|
59 |
+
|
60 |
+
1. Enter your prompt in the text box
|
61 |
+
2. Select two different models from the dropdown menus
|
62 |
+
3. Click "Generate Responses" to see the outputs
|
63 |
+
4. The responses will appear in the chatbot interfaces below each model selection
|
64 |
+
|
65 |
+
## Models Available
|
66 |
+
|
67 |
+
- HuggingFaceH4/zephyr-7b-beta
|
68 |
+
- meta-llama/Llama-3.1-8B-Instruct
|
69 |
+
- microsoft/Phi-3.5-mini-instruct
|
70 |
+
- Qwen/QwQ-32B
|
71 |
+
|
72 |
+
## Notes
|
73 |
+
|
74 |
+
- Make sure you have a valid Hugging Face API token with appropriate permissions
|
75 |
+
- Response times may vary depending on the model size and server load
|
app.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from huggingface_hub import InferenceClient
|
5 |
+
|
6 |
+
# Load environment variables
|
7 |
+
load_dotenv()
|
8 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
9 |
+
|
10 |
+
if not HF_TOKEN:
|
11 |
+
raise ValueError("Please set HF_TOKEN environment variable")
|
12 |
+
|
13 |
+
# Available models
|
14 |
+
AVAILABLE_MODELS = [
|
15 |
+
"HuggingFaceH4/zephyr-7b-beta",
|
16 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
17 |
+
"microsoft/Phi-3.5-mini-instruct",
|
18 |
+
"Qwen/QwQ-32B",
|
19 |
+
]
|
20 |
+
|
21 |
+
# Initialize inference client
|
22 |
+
inference_client = InferenceClient(token=HF_TOKEN)
|
23 |
+
|
24 |
+
def get_model_response(prompt, model_name, temperature_value, do_sample):
|
25 |
+
"""Get response from a Hugging Face model."""
|
26 |
+
try:
|
27 |
+
# Build kwargs dynamically
|
28 |
+
generation_args = {
|
29 |
+
"prompt": prompt,
|
30 |
+
"model": model_name,
|
31 |
+
"max_new_tokens": 100,
|
32 |
+
"do_sample": do_sample,
|
33 |
+
"return_full_text": False
|
34 |
+
}
|
35 |
+
|
36 |
+
# Only include temperature if sampling is enabled
|
37 |
+
if do_sample and temperature_value > 0:
|
38 |
+
generation_args["temperature"] = temperature_value
|
39 |
+
|
40 |
+
response = inference_client.text_generation(**generation_args)
|
41 |
+
return response
|
42 |
+
|
43 |
+
except Exception as e:
|
44 |
+
return f"Error: {str(e)}"
|
45 |
+
|
46 |
+
def compare_models(prompt, model1, model2, temp1, temp2, do_sample1, do_sample2):
|
47 |
+
"""Compare outputs from two selected models."""
|
48 |
+
if not prompt.strip():
|
49 |
+
return (
|
50 |
+
[{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
|
51 |
+
[{"role": "user", "content": prompt}, {"role": "assistant", "content": "Please enter a prompt"}],
|
52 |
+
gr.update(interactive=True)
|
53 |
+
)
|
54 |
+
|
55 |
+
response1 = get_model_response(prompt, model1, temp1, do_sample1)
|
56 |
+
response2 = get_model_response(prompt, model2, temp2, do_sample2)
|
57 |
+
|
58 |
+
# Format responses for chatbot display
|
59 |
+
chat1 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}]
|
60 |
+
chat2 = [{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}]
|
61 |
+
|
62 |
+
|
63 |
+
return chat1, chat2, gr.update(interactive=True)
|
64 |
+
|
65 |
+
|
66 |
+
# Update temperature slider interactivity based on sampling checkbox
|
67 |
+
def update_slider_state(enabled):
|
68 |
+
return [
|
69 |
+
gr.update(interactive=enabled),
|
70 |
+
gr.update(
|
71 |
+
elem_classes=[] if enabled else ["disabled-slider"],
|
72 |
+
value=0 if not enabled else None
|
73 |
+
)
|
74 |
+
]
|
75 |
+
|
76 |
+
# Create the Gradio interface
|
77 |
+
with gr.Blocks(css="""
|
78 |
+
.disabled-slider { opacity: 0.5; pointer-events: none; }
|
79 |
+
""") as demo:
|
80 |
+
gr.Markdown("# LLM Comparison Tool")
|
81 |
+
gr.Markdown("Compare outputs from different Hugging Face models side by side.")
|
82 |
+
|
83 |
+
with gr.Row():
|
84 |
+
prompt = gr.Textbox(
|
85 |
+
label="Enter your prompt",
|
86 |
+
placeholder="Type your prompt here...",
|
87 |
+
lines=3
|
88 |
+
)
|
89 |
+
|
90 |
+
with gr.Row():
|
91 |
+
submit_btn = gr.Button("Generate Responses")
|
92 |
+
|
93 |
+
with gr.Row():
|
94 |
+
with gr.Column():
|
95 |
+
model1_dropdown = gr.Dropdown(
|
96 |
+
choices=AVAILABLE_MODELS,
|
97 |
+
value=AVAILABLE_MODELS[0],
|
98 |
+
label="Select Model 1"
|
99 |
+
)
|
100 |
+
do_sample1 = gr.Checkbox(
|
101 |
+
label="Enable sampling (random outputs)",
|
102 |
+
value=False
|
103 |
+
)
|
104 |
+
temp1 = gr.Slider(
|
105 |
+
label="Temperature (Higher = more creative, lower = more predictable)",
|
106 |
+
minimum=0,
|
107 |
+
maximum=1,
|
108 |
+
step=0.1,
|
109 |
+
value=0.0,
|
110 |
+
interactive=False,
|
111 |
+
elem_classes=["disabled-slider"]
|
112 |
+
)
|
113 |
+
chatbot1 = gr.Chatbot(
|
114 |
+
label="Model 1 Output",
|
115 |
+
show_label=True,
|
116 |
+
height=300,
|
117 |
+
type="messages"
|
118 |
+
)
|
119 |
+
|
120 |
+
|
121 |
+
with gr.Column():
|
122 |
+
model2_dropdown = gr.Dropdown(
|
123 |
+
choices=AVAILABLE_MODELS,
|
124 |
+
value=AVAILABLE_MODELS[1],
|
125 |
+
label="Select Model 2"
|
126 |
+
)
|
127 |
+
do_sample2 = gr.Checkbox(
|
128 |
+
label="Enable sampling (random outputs)",
|
129 |
+
value=False
|
130 |
+
)
|
131 |
+
temp2 = gr.Slider(
|
132 |
+
label="Temperature (Higher = more creative, lower = more predictable)",
|
133 |
+
minimum=0,
|
134 |
+
maximum=1,
|
135 |
+
step=0.1,
|
136 |
+
value=0.0,
|
137 |
+
interactive=False,
|
138 |
+
elem_classes=["disabled-slider"]
|
139 |
+
)
|
140 |
+
chatbot2 = gr.Chatbot(
|
141 |
+
label="Model 2 Output",
|
142 |
+
show_label=True,
|
143 |
+
height=300,
|
144 |
+
type="messages"
|
145 |
+
)
|
146 |
+
|
147 |
+
def start_loading():
|
148 |
+
return gr.update(interactive=False)
|
149 |
+
|
150 |
+
# Handle form submission
|
151 |
+
submit_btn.click(
|
152 |
+
fn=start_loading,
|
153 |
+
inputs=None,
|
154 |
+
outputs=submit_btn,
|
155 |
+
queue=False
|
156 |
+
).then(
|
157 |
+
fn=compare_models,
|
158 |
+
inputs=[prompt, model1_dropdown, model2_dropdown, temp1, temp2, do_sample1, do_sample2],
|
159 |
+
outputs=[chatbot1, chatbot2, submit_btn]
|
160 |
+
)
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
do_sample1.change(
|
165 |
+
fn=update_slider_state,
|
166 |
+
inputs=[do_sample1],
|
167 |
+
outputs=[temp1, temp1]
|
168 |
+
)
|
169 |
+
|
170 |
+
do_sample2.change(
|
171 |
+
fn=update_slider_state,
|
172 |
+
inputs=[do_sample2],
|
173 |
+
outputs=[temp2, temp2]
|
174 |
+
)
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
demo.launch()
|
178 |
+
# demo.launch(share=True)
|
environment.yml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: llm_compare
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
dependencies:
|
5 |
+
- bzip2=1.0.8=hfdf4475_7
|
6 |
+
- ca-certificates=2025.4.26=hbd8a1cb_0
|
7 |
+
- libexpat=2.7.0=h240833e_0
|
8 |
+
- libffi=3.4.6=h281671d_1
|
9 |
+
- liblzma=5.8.1=hd471939_1
|
10 |
+
- liblzma-devel=5.8.1=hd471939_1
|
11 |
+
- libsqlite=3.49.2=hdb6dae5_0
|
12 |
+
- libzlib=1.3.1=hd23fc13_2
|
13 |
+
- ncurses=6.5=h0622a9a_3
|
14 |
+
- openssl=3.5.0=hc426f3f_1
|
15 |
+
- pip=25.1.1=pyh8b19718_0
|
16 |
+
- python=3.10.17=h93e8a92_0_cpython
|
17 |
+
- python-dotenv=1.1.0=pyh29332c3_1
|
18 |
+
- readline=8.2=h7cca4af_2
|
19 |
+
- setuptools=80.8.0=pyhff2d567_0
|
20 |
+
- tk=8.6.13=h1abcd95_1
|
21 |
+
- wheel=0.45.1=pyhd8ed1ab_1
|
22 |
+
- xz=5.8.1=h357f2ed_1
|
23 |
+
- xz-gpl-tools=5.8.1=h357f2ed_1
|
24 |
+
- xz-tools=5.8.1=hd471939_1
|
25 |
+
- pip:
|
26 |
+
- aiofiles==24.1.0
|
27 |
+
- annotated-types==0.7.0
|
28 |
+
- anyio==4.9.0
|
29 |
+
- certifi==2025.4.26
|
30 |
+
- charset-normalizer==3.4.2
|
31 |
+
- click==8.1.8
|
32 |
+
- exceptiongroup==1.3.0
|
33 |
+
- fastapi==0.115.12
|
34 |
+
- ffmpy==0.5.0
|
35 |
+
- filelock==3.18.0
|
36 |
+
- fsspec==2025.5.0
|
37 |
+
- gradio==5.29.0
|
38 |
+
- gradio-client==1.10.0
|
39 |
+
- groovy==0.1.2
|
40 |
+
- h11==0.16.0
|
41 |
+
- httpcore==1.0.9
|
42 |
+
- httpx==0.28.1
|
43 |
+
- huggingface-hub==0.31.2
|
44 |
+
- idna==3.10
|
45 |
+
- jinja2==3.1.6
|
46 |
+
- markdown-it-py==3.0.0
|
47 |
+
- markupsafe==3.0.2
|
48 |
+
- mdurl==0.1.2
|
49 |
+
- numpy==2.2.6
|
50 |
+
- orjson==3.10.18
|
51 |
+
- packaging==25.0
|
52 |
+
- pandas==2.2.3
|
53 |
+
- pillow==11.2.1
|
54 |
+
- pydantic==2.11.4
|
55 |
+
- pydantic-core==2.33.2
|
56 |
+
- pydub==0.25.1
|
57 |
+
- pygments==2.19.1
|
58 |
+
- python-dateutil==2.9.0.post0
|
59 |
+
- python-multipart==0.0.20
|
60 |
+
- pytz==2025.2
|
61 |
+
- pyyaml==6.0.2
|
62 |
+
- requests==2.32.3
|
63 |
+
- rich==14.0.0
|
64 |
+
- ruff==0.11.10
|
65 |
+
- safehttpx==0.1.6
|
66 |
+
- semantic-version==2.10.0
|
67 |
+
- shellingham==1.5.4
|
68 |
+
- six==1.17.0
|
69 |
+
- sniffio==1.3.1
|
70 |
+
- starlette==0.46.2
|
71 |
+
- tomlkit==0.13.2
|
72 |
+
- tqdm==4.67.1
|
73 |
+
- typer==0.15.4
|
74 |
+
- typing-extensions==4.13.2
|
75 |
+
- typing-inspection==0.4.1
|
76 |
+
- tzdata==2025.2
|
77 |
+
- urllib3==2.4.0
|
78 |
+
- uvicorn==0.34.2
|
79 |
+
- websockets==15.0.1
|