protobench / app.py
vtrv.vls
SBS theme
00b9214
raw
history blame
11.7 kB
import gradio
import argparse
import os
import boto3
import pandas as pd
from copy import copy
from random import choice
import queue
from constants import css, js_code, js_light
from utils import model_response, clear_chat
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
INIT_MODELS = dict()
S3_SESSION = None
TEST_MD = None
CURRENT_MODELS = queue.LifoQueue()
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
MODEL_LIST = ["TINYLLAMA", "QWEN2INS1B", "RUBASE"]
async def model_gen(
content,
chat_history,
model_name: str,
top_p,
temp,
max_tokens,
no_context=False
):
global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
model_manager(model_name, MODEL_LIB, 3)
if content is None:
return '', []
if len(content) == 0:
return '', []
chat_history = [] if no_context else chat_history
res = await model_response(
content,
chat_history,
S3_SESSION,
INIT_MODELS,
GEN_LIB,
model_name,
{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
)
return res
async def model_regen(
content,
chat_history,
model_name: str,
top_p,
temp,
max_tokens,
no_context=False
):
global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
model_manager(model_name, MODEL_LIB, 3)
if chat_history is None:
return '', []
chat_history = chat_history[-1] if no_context else chat_history
content = copy(chat_history[-1][0])
res = await model_response(
content,
chat_history[:-1],
S3_SESSION,
INIT_MODELS,
GEN_LIB,
model_name,
{"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
)
return res
def model_manager(
add_model,
model_lib,
max_models=3
):
global INIT_MODELS, CURRENT_MODELS
while CURRENT_MODELS.qsize() >= max_models:
model_del = CURRENT_MODELS.get()
INIT_MODELS[model_del] = None
CURRENT_MODELS.put(add_model)
INIT_MODELS[add_model] = model_lib[add_model]()
def tab_online_arena():
global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS, MODEL_LIST
with gradio.Row():
with gradio.Column():
model_left = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Left model")
chatbot_left = gradio.Chatbot()
with gradio.Column():
model_right = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Right model")
chatbot_right = gradio.Chatbot()
with gradio.Row():
msg = gradio.Textbox(label='Prompt', placeholder='Put your prompt here')
with gradio.Row():
gradio.Button('Both Good')
gradio.Button('Left Better')
gradio.Button('Right Better')
gradio.Button('Both Bad')
with gradio.Row():
with gradio.Accordion("Parameters", open=False):
no_context = gradio.Checkbox(label="No context", value=False)
top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=256, step=1, interactive=True)
with gradio.Row():
clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
regen_left = gradio.Button(value='Regenerate left answer')
regen_right = gradio.Button(value='Regenerate right answer')
regen_left.click(
model_regen,
[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
[msg, chatbot_left]
)
regen_right.click(
model_regen,
[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
[msg, chatbot_right]
)
with gradio.Blocks():
model_left.change(clear_chat, [], [msg, chatbot_left])
model_right.change(clear_chat, [], [msg, chatbot_right])
msg.submit(
model_gen,
[msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context],
[msg, chatbot_left]
)
msg.submit(
model_gen,
[msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context],
[msg, chatbot_right]
)
# with gradio.Column():
# gradio.ChatInterface(
# fn=giga_gen,
# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
# title="Giga",
# multimodal=True,
# )
# with gradio.Column():
# gradio.ChatInterface(
# fn=tiny_gen,
# examples=[{"text": "hello"}, {"text": "hola"}, {"text": "merhaba"}],
# title="Tiny",
# multimodal=True,
# )
# with gradio.Column():
# gradio.Interface(fn=giga_gen, inputs="text", outputs="text", allow_flagging=False, title='Giga') # arena =
# with gradio.Column():
# gradio.Interface(fn=tiny_gen, inputs="text", outputs="text", allow_flagging=False, title='TinyLlama') # arena =
# arena.launch()
def tab_leaderboard():
df = pd.DataFrame({
"Model" : ['A', 'B', 'C',],
"Test 1" : [0, 1, 0],
"Test 2" : [1, 0, 1,],
})
# Function to apply text color
def highlight_cols(x):
df = x.copy()
# df.loc[:, :] = 'color: purple'
df[['Model']] = 'color: green'
return df
# Applying the style function
# s = df.style.apply(highlight_cols, axis = None)
# Displaying the styled dataframe in Gradio
with gradio.TabItem("Autogen Metrics", id=0):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("Autometrics", id=1):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("SBS metrics", id=2):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
with gradio.TabItem("Arena ELO rating", id=3):
with gradio.Blocks() as demo:
gradio.DataFrame(df)
def tab_offline_arena():
global MODEL_LIST
with gradio.Row():
with gradio.Column(scale=1):
with gradio.Accordion("Choose models to sample from", open=False):
model_options = MODEL_LIST
selected_models = gradio.CheckboxGroup(model_options, info="", value=model_options, show_label=False, interactive=True)
clear_button = gradio.Button("Clear", scale=1)
# clear the selected_models
clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models])
with gradio.Column(scale=1):
with gradio.Accordion("Choose task types to sample from", open=False):
select_tasks = gradio.CheckboxGroup(['Task 1', "Task 2", "Task 3"], info="", value=['Task 1', "Task 2", "Task 3"], show_label=False, interactive=True)
clear_task_button = gradio.Button("Clear", scale=1)
# clear the select_tasks
clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])
btn_show_history = gradio.Button("Click to get sample and models' ouputs")
with gradio.Column(scale=1):
with gradio.Accordion("Choose criteria to sample", open=False):
with gradio.Row():
with gradio.Tab("Task 1"):
select_criteria_1 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)
with gradio.Tab("Task 2"):
select_criteria_2 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)
with gradio.Tab("Task 3"):
select_criteria_3 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True)
clear_criteria_button = gradio.Button("Clear", scale=1)
# clear the select_tasks
clear_criteria_button.click(lambda: {select_criteria_1: {"value": [], "__type__": "update"},
select_criteria_2: {"value": [], "__type__": "update"},
select_criteria_3: {"value": [], "__type__": "update"}},
inputs=[], outputs=[select_criteria_1, select_criteria_2, select_criteria_3])
with gradio.Accordion("History", open=True):
with gradio.Row():
with gradio.Column():
pass
with gradio.Column(scale=0.8):
chatbot_history = gradio.Chatbot([['hey', 'Hey!'], ["are we testing something?", None]], container=True)
with gradio.Column():
pass
with gradio.Row():
model_a = gradio.Text("Indeed we are.", label='Model A')
model_b = gradio.Text("Are we?", label='Model B')
def build_demo():
with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg)) as demo: # , css=css, js=js_light
with gradio.Tabs() as tabs:
with gradio.TabItem("🐼 MERA leaderboard", id=0):
gradio.Markdown(TEST_MD)
tab_leaderboard()
with gradio.TabItem("πŸ†š SBS by categories and criteria", id=1):
tab_offline_arena()
with gradio.TabItem("πŸ₯Š Model arena", id=2):
tab_online_arena()
# _tab_explore()
with gradio.TabItem("πŸ’ͺ About MERA", id=3):
gradio.Markdown(TEST_MD)
return demo
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--share", action="store_true")
# parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
args = parser.parse_args()
# data_load(args.result_file)
# TYPES = ["number", "markdown", "number"]
with open("test.md", "r") as f:
TEST_MD = f.read()
try:
session = boto3.session.Session()
S3_SESSION = session.client(
service_name='s3',
endpoint_url=os.getenv('S3_ENDPOINT'),
aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
)
except:
print('Failed to start s3 session')
app = build_demo()
app.launch(share=args.share, height=3000, width="110%") # share=args.share
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
# demo.launch()