Spaces:
Runtime error
Runtime error
File size: 2,606 Bytes
953bf3d debfcf8 2f5c740 d7f1630 2f5c740 d7f1630 ce419f1 b1d6e77 d7f1630 730ef21 d552000 b463202 2f5c740 d7f1630 dfc8b82 debfcf8 d7f1630 d7bea84 debfcf8 80ae653 debfcf8 2f5c740 9226230 80ae653 dfc8b82 b1d6e77 debfcf8 80ae653 debfcf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, T5Tokenizer
device = 'cpu' # if you have a GPU
tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large')
model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device)
model_list = [
'google/flan-t5-xxl',
'bigscience/bloomz-7b1',
'facebook/opt-iml-max-30b',
'allenai/tk-instruct-11b-def-pos']
HF_TOKEN = os.getenv("HF_TOKEN")
OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered"
ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN)
def process(model_A, model_B):
sample_ds = ds.shuffle().select(range(1))
sample = sample_ds[0]
prompt = sample["prompt"]
df = pd.DataFrame.from_records(sample["filtered_outputs"])
response_A_df = df[df['model']==model_A]["output"]
response_B_df= df[df['model']==model_B]["output"]
response_A = response_A_df.values[0]
response_B = response_B_df.values[0]
print(response_A)
input_text = "POST: "+ prompt+ "\n\n RESPONSE A: "+response_A+"\n\n RESPONSE B: "+response_B+"\n\n Which response is better? RESPONSE"
x = tokenizer([input_text], return_tensors='pt').input_ids.to(device)
y = model.generate(x, max_new_tokens=1)
prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0]
return prompt,df[df['model'].isin([model_A, model_B])], prefered
title = "Compare Instruction Models to see which one is more helpful"
description = "This app compares the outputs of various open-source, instruction-trained models from a [dataset](https://huggingface.co/datasets/{OUTPUTS_DATASET}) of human demonstrations using a reward model trained on the [Stanford Human Preferences Dataset (SHP)] (https://huggingface.co/datasets/stanfordnlp/SHP)"
interface = gr.Interface(fn=process,
inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'),
gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')],
outputs=[
gr.Textbox(label = "Prompt"),
gr.DataFrame(label = "Model Responses"),
gr.Textbox(label = "Preferred Option"),
],
title=title,
description = description
)
interface.launch(debug=True) |