rafmacalaba commited on
Commit
593f17e
·
1 Parent(s): 28e7655

change markdown

Browse files
Files changed (2) hide show
  1. app.py +24 -4
  2. relation_extraction.py +1 -0
app.py CHANGED
@@ -170,10 +170,30 @@ def _cached_predictions(state):
170
  return json.dumps(state, indent=2)
171
 
172
  with gr.Blocks() as demo:
173
- gr.Markdown("## Data Use Detector\n"
174
- "Adjust the sliders below to set thresholds, then:\n"
175
- "- **Submit** to highlight entities.\n"
176
- "- **Get Model Predictions** to see the raw JSON output.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  txt_in = gr.Textbox(
179
  label="Input Text",
 
170
  return json.dumps(state, indent=2)
171
 
172
  with gr.Blocks() as demo:
173
+ gr.Markdown("""# Data Use Detector
174
+
175
+ This Space demonstrates our fine-tuned GLiNER model’s ability to spot **dataset mentions** and **relations** in any input text. It identifies dataset names via NER, then extracts relations such as **publisher**, **acronym**, **publication year**, **data geography**, and more.
176
+
177
+ ---
178
+
179
+ **How it works**
180
+ 1. **NER**: Recognizes dataset names in your text.
181
+ 2. **RE**: Links each dataset to its attributes (e.g., publisher, year, acronym).
182
+ 3. **Visualization**: Highlights entities and relation spans inline.
183
+
184
+ **Instructions**
185
+ 1. Paste or edit your text in the box below.
186
+ 2. Tweak the **NER** & **RE** confidence sliders.
187
+ 3. Click **Submit** to see highlights.
188
+ 4. Click **Get Model Predictions** to view the raw JSON output.
189
+
190
+ **Resources**
191
+ - **Model:** `rafmacalaba/gliner_re_finetuned-v3`
192
+ - **Paper:** _Large Language Models and Synthetic Data for Monitoring Dataset Mentions in Research Papers_ – ArXiv: [2502.10263](https://arxiv.org/pdf/2502.10263)
193
+ - [GLiNER GitHub Repo](https://github.com/urchade/GLiNER)
194
+ - [Project Docs](https://worldbank.github.io/ai4data-use/docs/introduction.html)
195
+ """)
196
+
197
 
198
  txt_in = gr.Textbox(
199
  label="Input Text",
relation_extraction.py CHANGED
@@ -39,6 +39,7 @@ class CustomGLiNERRelationExtractor(GLiNERBasePipeline):
39
  """
40
  # Use the provided prompt or default to the class-level prompt
41
  prompt = prompt if prompt is not None else self.prompt
 
42
  super().__init__(model_id=model_id, model=model, prompt=prompt, device=device)
43
 
44
  def prepare_texts(self, texts: List[str], **kwargs):
 
39
  """
40
  # Use the provided prompt or default to the class-level prompt
41
  prompt = prompt if prompt is not None else self.prompt
42
+ self.return_index = return_index
43
  super().__init__(model_id=model_id, model=model, prompt=prompt, device=device)
44
 
45
  def prepare_texts(self, texts: List[str], **kwargs):