jedick commited on
Commit
9d59e2b
·
1 Parent(s): 13753a4

Save user feedback to dataset in HF Hub

Browse files
Files changed (2) hide show
  1. app.py +155 -43
  2. requirements.txt +1 -0
app.py CHANGED
@@ -4,6 +4,18 @@ from transformers import pipeline
4
  import nltk
5
  from retrieval import retrieve_from_pdf
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  if gr.NO_RELOAD:
9
  # Resource punkt_tab not found during application startup on HF spaces
@@ -17,6 +29,23 @@ if gr.NO_RELOAD:
17
  model=MODEL_NAME,
18
  )
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def prediction_to_df(prediction=None):
22
  """
@@ -75,27 +104,31 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
75
  gr.Markdown("# AI4citations")
76
  gr.Markdown("## *AI-powered scientific citation verification*")
77
  claim = gr.Textbox(
78
- label="1. Claim",
79
  info="aka hypothesis",
80
  placeholder="Input claim",
81
  )
82
  with gr.Row():
83
- with gr.Accordion("Get Evidence from PDF"):
84
- pdf_file = gr.File(label="Upload PDF", type="filepath", height=120)
85
- get_evidence = gr.Button(value="Get Evidence")
86
- top_k = gr.Slider(
87
- 1,
88
- 10,
89
- value=5,
90
- step=1,
91
- interactive=True,
92
- label="Top k sentences",
 
 
 
 
 
 
 
 
 
93
  )
94
- evidence = gr.TextArea(
95
- label="2. Evidence",
96
- info="aka premise",
97
- placeholder="Input evidence or use Get Evidence from PDF",
98
- )
99
  submit = gr.Button("3. Submit", visible=False)
100
 
101
  with gr.Column(scale=2):
@@ -113,20 +146,14 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
113
  visible=False,
114
  )
115
  label = gr.Label(label="Results")
116
- with gr.Accordion("Settings"):
117
- # Create dropdown menu to select the model
118
- dropdown = gr.Dropdown(
119
- choices=[
120
- # TODO: For bert-base-uncased, how can we set num_labels = 2 in HF pipeline?
121
- # (num_labels is available in AutoModelForSequenceClassification.from_pretrained)
122
- # "bert-base-uncased",
123
- "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli",
124
- "jedick/DeBERTa-v3-base-mnli-fever-anli-scifact-citint",
125
- ],
126
- value=MODEL_NAME,
127
- label="Model",
128
- )
129
- radio = gr.Radio(["label", "barplot"], value="label", label="Results")
130
  with gr.Accordion("Examples"):
131
  gr.Markdown("*Examples are run when clicked*"),
132
  with gr.Row():
@@ -163,8 +190,6 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
163
  ].tolist(),
164
  )
165
 
166
- # Sources and acknowledgments
167
-
168
  with gr.Row():
169
  with gr.Column(scale=3):
170
  with gr.Row():
@@ -173,9 +198,9 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
173
  """
174
  ### Usage:
175
 
176
- 1. Input a **Claim**
177
- 2. Input **Evidence** statements
178
- - *Optional:* Upload a PDF and click Get Evidence
179
  """
180
  )
181
  with gr.Column(scale=2):
@@ -183,14 +208,29 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
183
  """
184
  ### To make predictions:
185
 
186
- - Hit 'Enter' in the **Claim** text box,
187
- - Hit 'Shift-Enter' in the **Evidence** text box, or
188
- - Click Get Evidence
 
189
  """
190
  )
191
 
192
- with gr.Column(scale=2, elem_classes=["center-content"]):
193
- with gr.Accordion("Sources", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  gr.Markdown(
195
  """
196
  #### *Capstone project*
@@ -286,6 +326,61 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
286
  pdf_file = f"examples/retrieval/{pdf_file}"
287
  return pdf_file, claim
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  # Event listeners
290
 
291
  # Click the submit button or press Enter to submit
@@ -380,16 +475,16 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
380
 
381
  # Clear the previous predictions when the model is changed
382
  gr.on(
383
- triggers=[dropdown.select],
384
  fn=lambda: "Model changed! Waiting for updated predictions...",
385
  outputs=[prediction],
386
  api_name=False,
387
  )
388
 
389
  # Change the model the update the predictions
390
- dropdown.change(
391
  fn=select_model,
392
- inputs=dropdown,
393
  ).then(
394
  fn=query_model,
395
  inputs=[claim, evidence],
@@ -397,6 +492,23 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
397
  api_name=False,
398
  )
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
  if __name__ == "__main__":
402
  # allowed_paths is needed to upload PDFs from specific example directory
 
4
  import nltk
5
  from retrieval import retrieve_from_pdf
6
  import os
7
+ import json
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from uuid import uuid4
11
+
12
+
13
+ def is_running_in_hf_spaces():
14
+ """
15
+ Detects if app is running in Hugging Face Spaces
16
+ """
17
+ return "SPACE_ID" in os.environ
18
+
19
 
20
  if gr.NO_RELOAD:
21
  # Resource punkt_tab not found during application startup on HF spaces
 
29
  model=MODEL_NAME,
30
  )
31
 
32
+ # Setup user feedback file for uploading to HF dataset
33
+ # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
34
+ # https://huggingface.co/docs/huggingface_hub/v0.16.3/en/guides/upload#scheduled-uploads
35
+ USER_FEEDBACK_DIR = Path("user_feedback")
36
+ USER_FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
37
+ USER_FEEDBACK_PATH = USER_FEEDBACK_DIR / f"train-{uuid4()}.json"
38
+
39
+ if is_running_in_hf_spaces():
40
+ from huggingface_hub import CommitScheduler
41
+
42
+ scheduler = CommitScheduler(
43
+ repo_id="AI4citations-feedback",
44
+ repo_type="dataset",
45
+ folder_path=USER_FEEDBACK_DIR,
46
+ path_in_repo="data",
47
+ )
48
+
49
 
50
  def prediction_to_df(prediction=None):
51
  """
 
104
  gr.Markdown("# AI4citations")
105
  gr.Markdown("## *AI-powered scientific citation verification*")
106
  claim = gr.Textbox(
107
+ label="Claim",
108
  info="aka hypothesis",
109
  placeholder="Input claim",
110
  )
111
  with gr.Row():
112
+ with gr.Column(scale=2):
113
+ with gr.Accordion("Get Evidence from PDF"):
114
+ pdf_file = gr.File(
115
+ label="Upload PDF", type="filepath", height=120
116
+ )
117
+ get_evidence = gr.Button(value="Get Evidence")
118
+ top_k = gr.Slider(
119
+ 1,
120
+ 10,
121
+ value=5,
122
+ step=1,
123
+ interactive=True,
124
+ label="Top k sentences",
125
+ )
126
+ with gr.Column(scale=3):
127
+ evidence = gr.TextArea(
128
+ label="Evidence",
129
+ info="aka premise",
130
+ placeholder="Input evidence or use Get Evidence from PDF",
131
  )
 
 
 
 
 
132
  submit = gr.Button("3. Submit", visible=False)
133
 
134
  with gr.Column(scale=2):
 
146
  visible=False,
147
  )
148
  label = gr.Label(label="Results")
149
+ with gr.Accordion("Feedback"):
150
+ gr.Markdown(
151
+ "*Click on the correct label to help improve this app*<br>**NOTE:** The claim and evidence will also be saved"
152
+ ),
153
+ with gr.Row():
154
+ flag_support = gr.Button("Support")
155
+ flag_nei = gr.Button("NEI")
156
+ flag_refute = gr.Button("Refute")
 
 
 
 
 
 
157
  with gr.Accordion("Examples"):
158
  gr.Markdown("*Examples are run when clicked*"),
159
  with gr.Row():
 
190
  ].tolist(),
191
  )
192
 
 
 
193
  with gr.Row():
194
  with gr.Column(scale=3):
195
  with gr.Row():
 
198
  """
199
  ### Usage:
200
 
201
+ - Input a **Claim**, then:
202
+ - Upload a PDF and click **Get Evidence** OR
203
+ - Input **Evidence** statements yourself
204
  """
205
  )
206
  with gr.Column(scale=2):
 
208
  """
209
  ### To make predictions:
210
 
211
+ - Hit 'Enter' in the **Claim** text box OR
212
+ - Hit 'Shift-Enter' in the **Evidence** text box
213
+
214
+ _Predictions are also made after clicking **Get Evidence**_
215
  """
216
  )
217
 
218
+ with gr.Column(scale=2):
219
+ with gr.Accordion("Settings", open=False):
220
+ # Create dropdown menu to select the model
221
+ model = gr.Dropdown(
222
+ choices=[
223
+ # TODO: For bert-base-uncased, how can we set num_labels = 2 in HF pipeline?
224
+ # (num_labels is available in AutoModelForSequenceClassification.from_pretrained)
225
+ # "bert-base-uncased",
226
+ "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli",
227
+ "jedick/DeBERTa-v3-base-mnli-fever-anli-scifact-citint",
228
+ ],
229
+ value=MODEL_NAME,
230
+ label="Model",
231
+ )
232
+ radio = gr.Radio(["label", "barplot"], value="label", label="Results")
233
+ with gr.Accordion("Sources", open=False, elem_classes=["center_content"]):
234
  gr.Markdown(
235
  """
236
  #### *Capstone project*
 
326
  pdf_file = f"examples/retrieval/{pdf_file}"
327
  return pdf_file, claim
328
 
329
+ def append_feedback(
330
+ claim: str, evidence: str, model: str, label: str, user_label: str
331
+ ) -> None:
332
+ """
333
+ Append input/outputs and user feedback to a JSON Lines file.
334
+ """
335
+ with USER_FEEDBACK_PATH.open("a") as f:
336
+ f.write(
337
+ json.dumps(
338
+ {
339
+ "claim": claim,
340
+ "evidence": evidence,
341
+ "model": model,
342
+ "prediction": label,
343
+ "user_label": user_label,
344
+ "datetime": datetime.now().isoformat(),
345
+ }
346
+ )
347
+ )
348
+ f.write("\n")
349
+ gr.Success(f"Saved your feedback: {user_label}", duration=2, title="Thank you!")
350
+
351
+ def save_feedback_support(*args) -> None:
352
+ """
353
+ Save user feedback: Support
354
+ """
355
+ if is_running_in_hf_spaces():
356
+ # Use a thread lock to avoid concurrent writes from different users.
357
+ with scheduler.lock:
358
+ append_feedback(*args, user_label="Support")
359
+ else:
360
+ append_feedback(*args, user_label="Support")
361
+
362
+ def save_feedback_nei(*args) -> None:
363
+ """
364
+ Save user feedback: NEI
365
+ """
366
+ if is_running_in_hf_spaces():
367
+ # Use a thread lock to avoid concurrent writes from different users.
368
+ with scheduler.lock:
369
+ append_feedback(*args, user_label="NEI")
370
+ else:
371
+ append_feedback(*args, user_label="NEI")
372
+
373
+ def save_feedback_refute(*args) -> None:
374
+ """
375
+ Save user feedback: Refute
376
+ """
377
+ if is_running_in_hf_spaces():
378
+ # Use a thread lock to avoid concurrent writes from different users.
379
+ with scheduler.lock:
380
+ append_feedback(*args, user_label="Refute")
381
+ else:
382
+ append_feedback(*args, user_label="Refute")
383
+
384
  # Event listeners
385
 
386
  # Click the submit button or press Enter to submit
 
475
 
476
  # Clear the previous predictions when the model is changed
477
  gr.on(
478
+ triggers=[model.select],
479
  fn=lambda: "Model changed! Waiting for updated predictions...",
480
  outputs=[prediction],
481
  api_name=False,
482
  )
483
 
484
  # Change the model the update the predictions
485
+ model.change(
486
  fn=select_model,
487
+ inputs=model,
488
  ).then(
489
  fn=query_model,
490
  inputs=[claim, evidence],
 
492
  api_name=False,
493
  )
494
 
495
+ # Log user feedback when button is clicked
496
+ flag_support.click(
497
+ fn=save_feedback_support,
498
+ inputs=[claim, evidence, model, label],
499
+ outputs=None,
500
+ )
501
+ flag_nei.click(
502
+ fn=save_feedback_nei,
503
+ inputs=[claim, evidence, model, label],
504
+ outputs=None,
505
+ )
506
+ flag_refute.click(
507
+ fn=save_feedback_refute,
508
+ inputs=[claim, evidence, model, label],
509
+ outputs=None,
510
+ )
511
+
512
 
513
  if __name__ == "__main__":
514
  # allowed_paths is needed to upload PDFs from specific example directory
requirements.txt CHANGED
@@ -6,3 +6,4 @@ pymupdf
6
  unidecode
7
  nltk
8
  bm25s
 
 
6
  unidecode
7
  nltk
8
  bm25s
9
+ git+https://github.com/huggingface/huggingface_hub