File size: 4,551 Bytes
c8c8070
0773644
1d82bda
 
 
 
 
 
 
 
 
 
 
 
40eb9ab
 
 
 
 
 
 
 
 
 
 
1d82bda
 
 
c8c8070
1d82bda
0251c9e
1d82bda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0773644
 
1d82bda
34ab564
0773644
 
 
 
40eb9ab
c8c8070
40eb9ab
56d3094
b8e2db5
0773644
963e057
 
 
 
0773644
 
963e057
 
0773644
b8e2db5
0773644
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import json
import re
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = 'yuntian-deng/ak-paper-selection-deberta'
max_length = 512
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()
if torch.cuda.is_available():
    model.cuda()

validation_results = json.load(open('validation_results.json'))
scores, thresholds, precisions = validation_results['scores'], validation_results['thresholds'], validation_results['precisions']

def get_threshold_precision(score_):
    for score, threshold, precision in zip(all_scores, thresholds, precisions):
        if score_ < score:
            break
        prev_score, prev_threshold, prev_precision = score, threshold, precision
    if prev_threshold == prev_score:
        prev_threshold = score_
    return prev_threshold, prev_precision

def normalize_spaces(text):
    return re.sub(r'\s+', ' ', text).strip()

def fill_template(title, authors, abstract):
    title = normalize_spaces(title.replace('\n', ' '))
    authors = ', '.join([author.strip() for author in authors.split(',')])
    abstract = normalize_spaces(abstract.replace('\n', ' '))
    text = f"""Title: {title}
Authors: {authors}
Abstract: {abstract}"""
    return text

@torch.no_grad()
def model_inference(title, authors, abstract):
    text = fill_template(title, authors, abstract)
    print (text)
    inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=max_length)
    if torch.cuda.is_available():
        inputs = {key: value.cuda() for key, value in inputs.items()}
    outputs = model(**inputs)
    logits = outputs.logits
    probs = logits.softmax(dim=-1).view(-1)
    score = probs[1].item()
    return score

def predict(title, authors, abstract):
    # Your model prediction logic here
    score = model_inference(title, authors, abstract)
    
    # Calculate precision for scores >= the predicted score
    #selected = [d for d in validation_data if d['score'] >= score]
    #true_positives = sum(1 for d in selected if d['label'] == 1)
    #precision = true_positives / len(selected) if selected else 0
    threshold, precision = get_threshold_precision(score)
    
    result = f"Your score: {score:.2f}.\nFor papers with score >= {threshold:.2f}, {precision * 100:.2f}% are selected by AK."
    
    return score, result

example_title = "WildChat: 1M ChatGPT Interaction Logs in the Wild"
example_authors = "Wenting Zhao, Xiang Ren, Jack Hessel, Claire Cardie, Yejin Choi, Yuntian Deng"
example_abstract = "Chatbots such as GPT-4 and ChatGPT are now serving millions of users. Despite their widespread use, there remains a lack of public datasets showcasing how these tools are used by a population of users in practice. To bridge this gap, we offered free access to ChatGPT for online users in exchange for their affirmative, consensual opt-in to anonymously collect their chat transcripts and request headers. From this, we compiled WildChat, a corpus of 1 million user-ChatGPT conversations, which consists of over 2.5 million interaction turns. We compare WildChat with other popular user-chatbot interaction datasets, and find that our dataset offers the most diverse user prompts, contains the largest number of languages, and presents the richest variety of potentially toxic use-cases for researchers to study. In addition to timestamped chat transcripts, we enrich the dataset with demographic data, including state, country, and hashed IP addresses, alongside request headers. This augmentation allows for more detailed analysis of user behaviors across different geographical regions and temporal dimensions. Finally, because it captures a broad range of use cases, we demonstrate the dataset’s potential utility in fine-tuning instruction-following models. WildChat is released at https://wildchat.allen.ai under AI2 ImpACT Licenses."
    
iface = gr.Interface(
    fn=predict,
    inputs=[gr.Textbox(placeholder="Enter paper title", value=example_title), gr.Textbox(placeholder="Enter authors (separated by comma)", value=example_authors), gr.TextArea(placeholder="Enter abstract", value=example_abstract)],
    outputs=[gr.Textbox(label="Predicted Score"), gr.Textbox(label="Predicted Selection Probability")],
    title="Will your paper be selected by @_akhaliq?",
    description="Enter the title, authors, and abstract of the paper to predict whether @_akhaliq will select your paper!",
    live=True
)

iface.launch()