RyanTietjen's picture
Update app.py
dbaf100 verified
raw
history blame
8.16 kB
"""
Ryan Tietjen
Sep 2024
Demo application for paper abstract fragmentaion demonstration
"""
import gradio as gr
import tensorflow as tf
from tensorflow import keras
from keras import layers
from timeit import default_timer as timer
from process_input import split_abstract
from process_input import split_abstract_original
from process_input import split_sentences_by_characters
import pandas as pd
import tensorflow_hub as hub
from model import EmbeddingLayer
from process_input import encode_labels
sample_list = []
example1 = f"""The aim of this study was to verify in bruxism patients the possible efficacy of auricular stimulation in reducing the hypertonicity of some masticatory muscles.
Forty-three bruxism patients were randomly allocated to 3 groups : acupuncture , needle contact for 10 seconds , no treatment ( control ).
Helkimo 's clinical dysfunction index ( CDI ) and anamnestic dysfunction index ( ADI ) were used to assess the functional state of the masticatory system.
The resting electrical activity of the anterior temporalis ( AT ) , masseter ( MM ) , digastric ( DA ) and sternocleidomastoid ( SCM ) muscles was measured , according to Jankelson , with surface electrodes at baseline , after stimulation and continually for 30 minutes ( 120 measurements in total ).
The electromyographical variations in the 3 groups were studied with t test for independent samples.
Acupuncture and needle contact were superior to control in reducing the muscle hypertonicity of all muscles except SCM.
In the comparison between acupuncture and needle contact the former showed better results only for the right TA and left DA ( p = 0.000 ).
In this study it was possible to measure the efficacy of the stimulation of only one point or area , which is an ideal model for research in acupuncture.
The auricular area we chose for stimulation was never used before for the purpose of relaxing masticatory muscles.
Acupuncture and needle contact for 10 seconds showed similar effects."""
sample_list.append(example1)
def format_non_empty_lists(objective, background, methods, results, conclusion):
"""
This function checks each provided list and formats a string with the list name and its contents
only if the list is not empty.
Parameters:
- objective (list): List containing sentences classified as 'Objective'.
- background (list): List containing sentences classified as 'Background'.
- methods (list): List containing sentences classified as 'Methods'.
- results (list): List containing sentences classified as 'Results'.
- conclusion (list): List containing sentences classified as 'Conclusion'.
Returns:
- str: A formatted string that contains the non-empty list names and their contents.
"""
output = ""
lists = {
'Objective': objective,
'Background': background,
'Methods': methods,
'Results': results,
'Conclusion': conclusion
}
for name, content in lists.items():
if content: # Check if the list is not empty
output += f"{name}:\n" # Append the category name followed by a newline
for item in content:
output += f" - {item}\n" # Append each item in the list, formatted as a list
output += "\n" # Append a newline for better separation between categories
return output.strip()
def fragment_single_abstract(abstract):
"""
Processes a single abstract by fragmenting it into structured sections based on predefined categories
such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model
to predict the category of each sentence in the abstract.
The process involves several steps:
1. Splitting the abstract into sentences.
2. Encoding these sentences using a custom embedding layer.
3. Classifying each sentence into one of the predefined categories.
4. Grouping the sentences by their predicted categories.
Parameters:
abstract (str): The abstract text that needs to be processed and categorized.
Returns:
tuple: A tuple containing two elements:
- A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions')
and values as lists of sentences belonging to these categories. Only non-empty categories are returned.
- The time taken to process the abstract (in seconds).
Example:
```python
abstract_text = "This study aims to evaluate the effectiveness of..."
categorized_abstract, processing_time = fragment_single_abstract(abstract_text)
print("Categorized Abstract:", categorized_abstract)
print("Processing Time:", processing_time)
```
Note:
- This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer'
are available and correctly configured to be loaded.
- The function uses pandas for data manipulation, TensorFlow for machine learning operations,
and TensorFlow's data API for batching and prefetching data for model predictions.
"""
start_time = timer()
original_abstract = split_abstract_original(abstract)
df_original = pd.DataFrame(original_abstract)
sentences_original = df_original["text"].tolist()
abstract_split = split_abstract(abstract)
df = pd.DataFrame(abstract_split)
sentences = df["text"].tolist()
labels = encode_labels(df["target"])
objective = []
background = []
methods = []
results = []
conclusion = []
embed_layer = EmbeddingLayer()
model = tf.keras.models.load_model("200k_10_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer})
data_by_character = split_sentences_by_characters(sentences)
line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15)
total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20)
sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character))
labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE)
predictions = tf.argmax(model.predict(dataset), axis=1)
for i, prediction in enumerate(predictions):
if prediction == 0:
objective.append(sentences_original[i])
elif prediction == 1:
methods.append(sentences_original[i])
elif prediction == 2:
results.append(sentences_original[i])
elif prediction == 3:
conclusion.append(sentences_original[i])
elif prediction == 4:
background.append(sentences_original[i])
end_time = timer()
return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time
title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen"
description = f"""
This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion.
The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/pdf/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture
was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251)
This model achieved a testing accuracy of 88.2% and a F1 score of 88%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation).
How to use:
-Paste the given abstract into the box below.
-Make sure to separate each sentence by a new line (this helps avoid ambiguity).
-Click submit, and allow the model to run!
"""
demo = gr.Interface(
fn=fragment_single_abstract,
inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."),
outputs=[
gr.Textbox(label="Fragmented Abstract"),
gr.Number(label="Time to process (s)"),
],
examples=sample_list,
title=title,
description=description,
)
demo.launch(share=False)