Spaces:

jedick
/

AI4citations

Running on Zero

App Files Files Community

jedick commited on 27 days ago

Commit

9d59e2b

1 Parent(s): 13753a4

Save user feedback to dataset in HF Hub

Browse files

Files changed (2) hide show

app.py +155 -43
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -4,6 +4,18 @@ from transformers import pipeline
 import nltk
 from retrieval import retrieve_from_pdf
 import os
 if gr.NO_RELOAD:
     # Resource punkt_tab not found during application startup on HF spaces
@@ -17,6 +29,23 @@ if gr.NO_RELOAD:
         model=MODEL_NAME,
     )
 def prediction_to_df(prediction=None):
     """
@@ -75,27 +104,31 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
                 gr.Markdown("# AI4citations")
                 gr.Markdown("## *AI-powered scientific citation verification*")
             claim = gr.Textbox(
-                label="1. Claim",
                 info="aka hypothesis",
                 placeholder="Input claim",
             )
             with gr.Row():
-                with gr.Accordion("Get Evidence from PDF"):
-                    pdf_file = gr.File(label="Upload PDF", type="filepath", height=120)
-                    get_evidence = gr.Button(value="Get Evidence")
-                    top_k = gr.Slider(
-                        1,
-                        10,
-                        value=5,
-                        step=1,
-                        interactive=True,
-                        label="Top k sentences",
                     )
-                evidence = gr.TextArea(
-                    label="2. Evidence",
-                    info="aka premise",
-                    placeholder="Input evidence or use Get Evidence from PDF",
-                )
             submit = gr.Button("3. Submit", visible=False)
         with gr.Column(scale=2):
@@ -113,20 +146,14 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
                 visible=False,
             )
             label = gr.Label(label="Results")
-            with gr.Accordion("Settings"):
-                # Create dropdown menu to select the model
-                dropdown = gr.Dropdown(
-                    choices=[
-                        # TODO: For bert-base-uncased, how can we set num_labels = 2 in HF pipeline?
-                        # (num_labels is available in AutoModelForSequenceClassification.from_pretrained)
-                        # "bert-base-uncased",
-                        "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli",
-                        "jedick/DeBERTa-v3-base-mnli-fever-anli-scifact-citint",
-                    ],
-                    value=MODEL_NAME,
-                    label="Model",
-                )
-                radio = gr.Radio(["label", "barplot"], value="label", label="Results")
             with gr.Accordion("Examples"):
                 gr.Markdown("*Examples are run when clicked*"),
                 with gr.Row():
@@ -163,8 +190,6 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
                     ].tolist(),
                 )
-    # Sources and acknowledgments
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Row():
@@ -173,9 +198,9 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
                         """
                     ### Usage:
-                    1. Input a **Claim**
-                    2. Input **Evidence** statements
-                    - *Optional:* Upload a PDF and click Get Evidence
                     """
                     )
                 with gr.Column(scale=2):
@@ -183,14 +208,29 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
                         """
                     ### To make predictions:
-                    - Hit 'Enter' in the **Claim** text box,
-                    - Hit 'Shift-Enter' in the **Evidence** text box, or
-                    - Click Get Evidence
                     """
                     )
-        with gr.Column(scale=2, elem_classes=["center-content"]):
-            with gr.Accordion("Sources", open=False):
                 gr.Markdown(
                     """
                 #### *Capstone project*
@@ -286,6 +326,61 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
             pdf_file = f"examples/retrieval/{pdf_file}"
         return pdf_file, claim
     # Event listeners
     # Click the submit button or press Enter to submit
@@ -380,16 +475,16 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
     # Clear the previous predictions when the model is changed
     gr.on(
-        triggers=[dropdown.select],
         fn=lambda: "Model changed! Waiting for updated predictions...",
         outputs=[prediction],
         api_name=False,
     )
     # Change the model the update the predictions
-    dropdown.change(
         fn=select_model,
-        inputs=dropdown,
     ).then(
         fn=query_model,
         inputs=[claim, evidence],
@@ -397,6 +492,23 @@ with gr.Blocks(theme=my_theme, css=custom_css, head=font_awesome_html) as demo:
         api_name=False,
     )
 if __name__ == "__main__":
     # allowed_paths is needed to upload PDFs from specific example directory

 import nltk
 from retrieval import retrieve_from_pdf
 import os
+import json
+from datetime import datetime
+from pathlib import Path
+from uuid import uuid4
+def is_running_in_hf_spaces():
+    """
+    Detects if app is running in Hugging Face Spaces
+    """
+    return "SPACE_ID" in os.environ
 if gr.NO_RELOAD:
     # Resource punkt_tab not found during application startup on HF spaces
         model=MODEL_NAME,
     )
+    # Setup user feedback file for uploading to HF dataset
+    # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
+    # https://huggingface.co/docs/huggingface_hub/v0.16.3/en/guides/upload#scheduled-uploads
+    USER_FEEDBACK_DIR = Path("user_feedback")
+    USER_FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
+    USER_FEEDBACK_PATH = USER_FEEDBACK_DIR / f"train-{uuid4()}.json"
+    if is_running_in_hf_spaces():
+        from huggingface_hub import CommitScheduler
+        scheduler = CommitScheduler(
+            repo_id="AI4citations-feedback",
+            repo_type="dataset",
+            folder_path=USER_FEEDBACK_DIR,
+            path_in_repo="data",
+        )
 def prediction_to_df(prediction=None):
     """
                 gr.Markdown("# AI4citations")
                 gr.Markdown("## *AI-powered scientific citation verification*")
             claim = gr.Textbox(
+                label="Claim",
                 info="aka hypothesis",
                 placeholder="Input claim",
             )
             with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Accordion("Get Evidence from PDF"):
+                        pdf_file = gr.File(
+                            label="Upload PDF", type="filepath", height=120
+                        )
+                        get_evidence = gr.Button(value="Get Evidence")
+                        top_k = gr.Slider(
+                            1,
+                            10,
+                            value=5,
+                            step=1,
+                            interactive=True,
+                            label="Top k sentences",
+                        )
+                with gr.Column(scale=3):
+                    evidence = gr.TextArea(
+                        label="Evidence",
+                        info="aka premise",
+                        placeholder="Input evidence or use Get Evidence from PDF",
                     )
             submit = gr.Button("3. Submit", visible=False)
         with gr.Column(scale=2):
                 visible=False,
             )
             label = gr.Label(label="Results")
+            with gr.Accordion("Feedback"):
+                gr.Markdown(
+                    "*Click on the correct label to help improve this app*<br>**NOTE:** The claim and evidence will also be saved"
+                ),
+                with gr.Row():
+                    flag_support = gr.Button("Support")
+                    flag_nei = gr.Button("NEI")
+                    flag_refute = gr.Button("Refute")
             with gr.Accordion("Examples"):
                 gr.Markdown("*Examples are run when clicked*"),
                 with gr.Row():
                     ].tolist(),
                 )
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Row():
                         """
                     ### Usage:
+                    - Input a **Claim**, then:
+                        - Upload a PDF and click **Get Evidence** OR
+                        - Input **Evidence** statements yourself
                     """
                     )
                 with gr.Column(scale=2):
                         """
                     ### To make predictions:
+                    - Hit 'Enter' in the **Claim** text box OR
+                    - Hit 'Shift-Enter' in the **Evidence** text box
+                    _Predictions are also made after clicking **Get Evidence**_
                     """
                     )
+        with gr.Column(scale=2):
+            with gr.Accordion("Settings", open=False):
+                # Create dropdown menu to select the model
+                model = gr.Dropdown(
+                    choices=[
+                        # TODO: For bert-base-uncased, how can we set num_labels = 2 in HF pipeline?
+                        # (num_labels is available in AutoModelForSequenceClassification.from_pretrained)
+                        # "bert-base-uncased",
+                        "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli",
+                        "jedick/DeBERTa-v3-base-mnli-fever-anli-scifact-citint",
+                    ],
+                    value=MODEL_NAME,
+                    label="Model",
+                )
+                radio = gr.Radio(["label", "barplot"], value="label", label="Results")
+            with gr.Accordion("Sources", open=False, elem_classes=["center_content"]):
                 gr.Markdown(
                     """
                 #### *Capstone project*
             pdf_file = f"examples/retrieval/{pdf_file}"
         return pdf_file, claim
+    def append_feedback(
+        claim: str, evidence: str, model: str, label: str, user_label: str
+    ) -> None:
+        """
+        Append input/outputs and user feedback to a JSON Lines file.
+        """
+        with USER_FEEDBACK_PATH.open("a") as f:
+            f.write(
+                json.dumps(
+                    {
+                        "claim": claim,
+                        "evidence": evidence,
+                        "model": model,
+                        "prediction": label,
+                        "user_label": user_label,
+                        "datetime": datetime.now().isoformat(),
+                    }
+                )
+            )
+            f.write("\n")
+        gr.Success(f"Saved your feedback: {user_label}", duration=2, title="Thank you!")
+    def save_feedback_support(*args) -> None:
+        """
+        Save user feedback: Support
+        """
+        if is_running_in_hf_spaces():
+            # Use a thread lock to avoid concurrent writes from different users.
+            with scheduler.lock:
+                append_feedback(*args, user_label="Support")
+        else:
+            append_feedback(*args, user_label="Support")
+    def save_feedback_nei(*args) -> None:
+        """
+        Save user feedback: NEI
+        """
+        if is_running_in_hf_spaces():
+            # Use a thread lock to avoid concurrent writes from different users.
+            with scheduler.lock:
+                append_feedback(*args, user_label="NEI")
+        else:
+            append_feedback(*args, user_label="NEI")
+    def save_feedback_refute(*args) -> None:
+        """
+        Save user feedback: Refute
+        """
+        if is_running_in_hf_spaces():
+            # Use a thread lock to avoid concurrent writes from different users.
+            with scheduler.lock:
+                append_feedback(*args, user_label="Refute")
+        else:
+            append_feedback(*args, user_label="Refute")
     # Event listeners
     # Click the submit button or press Enter to submit
     # Clear the previous predictions when the model is changed
     gr.on(
+        triggers=[model.select],
         fn=lambda: "Model changed! Waiting for updated predictions...",
         outputs=[prediction],
         api_name=False,
     )
     # Change the model the update the predictions
+    model.change(
         fn=select_model,
+        inputs=model,
     ).then(
         fn=query_model,
         inputs=[claim, evidence],
         api_name=False,
     )
+    # Log user feedback when button is clicked
+    flag_support.click(
+        fn=save_feedback_support,
+        inputs=[claim, evidence, model, label],
+        outputs=None,
+    )
+    flag_nei.click(
+        fn=save_feedback_nei,
+        inputs=[claim, evidence, model, label],
+        outputs=None,
+    )
+    flag_refute.click(
+        fn=save_feedback_refute,
+        inputs=[claim, evidence, model, label],
+        outputs=None,
+    )
 if __name__ == "__main__":
     # allowed_paths is needed to upload PDFs from specific example directory

requirements.txt CHANGED Viewed

@@ -6,3 +6,4 @@ pymupdf
 unidecode
 nltk
 bm25s

 unidecode
 nltk
 bm25s
+git+https://github.com/huggingface/huggingface_hub