Spaces:

ai4data
/

datause-detector

Paused

rafmacalaba commited on May 25

Commit

593f17e

1 Parent(s): 28e7655

change markdown

Files changed (2) hide show

app.py CHANGED Viewed

@@ -170,10 +170,30 @@ def _cached_predictions(state):
     return json.dumps(state, indent=2)
 with gr.Blocks() as demo:
-    gr.Markdown("## Data Use Detector\n"
-                "Adjust the sliders below to set thresholds, then:\n"
-                "- **Submit** to highlight entities.\n"
-                "- **Get Model Predictions** to see the raw JSON output.")
     txt_in = gr.Textbox(
         label="Input Text",

     return json.dumps(state, indent=2)
 with gr.Blocks() as demo:
+    gr.Markdown("""# Data Use Detector
+    This Space demonstrates our fine-tuned GLiNER model’s ability to spot **dataset mentions** and **relations** in any input text. It identifies dataset names via NER, then extracts relations such as **publisher**, **acronym**, **publication year**, **data geography**, and more.
+    ---
+    **How it works**
+    1. **NER**: Recognizes dataset names in your text.
+    2. **RE**: Links each dataset to its attributes (e.g., publisher, year, acronym).
+    3. **Visualization**: Highlights entities and relation spans inline.
+    **Instructions**
+    1. Paste or edit your text in the box below.
+    2. Tweak the **NER** & **RE** confidence sliders.
+    3. Click **Submit** to see highlights.
+    4. Click **Get Model Predictions** to view the raw JSON output.
+    **Resources**
+    - **Model:** `rafmacalaba/gliner_re_finetuned-v3`
+    - **Paper:** _Large Language Models and Synthetic Data for Monitoring Dataset Mentions in Research Papers_ – ArXiv: [2502.10263](https://arxiv.org/pdf/2502.10263)
+    - [GLiNER GitHub Repo](https://github.com/urchade/GLiNER)
+    - [Project Docs](https://worldbank.github.io/ai4data-use/docs/introduction.html)
+    """)
     txt_in = gr.Textbox(
         label="Input Text",

relation_extraction.py CHANGED Viewed

@@ -39,6 +39,7 @@ class CustomGLiNERRelationExtractor(GLiNERBasePipeline):
         """
         # Use the provided prompt or default to the class-level prompt
         prompt = prompt if prompt is not None else self.prompt
         super().__init__(model_id=model_id, model=model, prompt=prompt, device=device)
     def prepare_texts(self, texts: List[str], **kwargs):

         """
         # Use the provided prompt or default to the class-level prompt
         prompt = prompt if prompt is not None else self.prompt
+        self.return_index = return_index
         super().__init__(model_id=model_id, model=model, prompt=prompt, device=device)
     def prepare_texts(self, texts: List[str], **kwargs):