Spaces:

RyanTietjen
/

Paper-Fragmentation

Sleeping

App Files Files Community

RyanTietjen commited on Sep 30, 2024

Commit

f343cdb

verified ·

1 Parent(s): ea34069

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -182

app.py CHANGED Viewed

@@ -1,183 +1,183 @@
-"""
-Ryan Tietjen
-Sep 2024
-Demo application for paper abstract fragmentaion demonstration
-"""
-import gradio as gr
-import tensorflow as tf
-from tensorflow import keras
-from keras import layers
-from timeit import default_timer as timer
-from process_input import split_abstract
-from process_input import split_abstract_original
-from process_input import split_sentences_by_characters
-import pandas as pd
-import tensorflow_hub as hub
-from model import EmbeddingLayer
-from process_input import encode_labels
-sample_list = []
-example1 =  f"""The aim of this study was to describe the electrocardiographic ( ECG ) evolutionary changes after an acute myocardial infarction ( AMI ) and to evaluate their correlation with left ventricular function and remodeling.
-The QRS complex changes after AMI have been correlated with infarct size and left ventricular function.
-By contrast , the significance of T wave changes is controversial.
-We studied 536 patients enrolled in the GISSI-3-Echo substudy who underwent ECG and echocardiographic studies at 24 to 48 h ( S1 ) , at hospital discharge ( S2 ) , at six weeks ( S3 ) and six months ( S4 ) after AMI.
-The number of Qwaves ( nQ ) and QRS quantitative score ( QRSs ) did not change over time.
-From S2 to S4 , the number of negative T waves ( nT NEG ) decreased ( p < 0.0001 ) , wall motion abnormalities ( % WMA ) improved ( p < 0.001 ) , ventricular volumes increased ( p < 0.0001 ) while ejection fraction remained stable.
-According to the T wave changes after hospital discharge , patients were divided into four groups : stable positive T waves ( group 1 , n = 35 ) , patients who showed a decrease > or = 1 in nT NEG ( group 2 , n = 361 ) , patients with no change in nT NEG ( group 3 , n = 64 ) and those with an increase > or = 1 in nT NEG ( group 4 , n = 76 ).
-The QRSs and nQ remained stable in all groups.
-Groups 3 and 4 showed less recovery in % WMA , more pronounced ventricular enlargement and progressive decline in ejection fraction than groups 1 and 2 ( interaction time x groups p < 0.0001 ).
-The analysis of serial ECG can predict postinfarct left ventricular remodeling.
-Normalization of negative T waves during the follow-up appears more strictly related to recovery of regional dysfunction than QRS changes.
-Lack of resolution and late appearance of new negative T predict unfavorable remodeling with progressive deterioration of ventricular function."""
-sample_list.append(example1)
-def format_non_empty_lists(objective, background, methods, results, conclusion):
-    """
-    This function checks each provided list and formats a string with the list name and its contents
-    only if the list is not empty.
-    Parameters:
-    - objective (list): List containing sentences classified as 'Objective'.
-    - background (list): List containing sentences classified as 'Background'.
-    - methods (list): List containing sentences classified as 'Methods'.
-    - results (list): List containing sentences classified as 'Results'.
-    - conclusion (list): List containing sentences classified as 'Conclusion'.
-    Returns:
-    - str: A formatted string that contains the non-empty list names and their contents.
-    """
-    output = ""
-    lists = {
-        'Objective': objective,
-        'Background': background,
-        'Methods': methods,
-        'Results': results,
-        'Conclusion': conclusion
-    }
-    for name, content in lists.items():
-        if content:  # Check if the list is not empty
-            output += f"{name}:\n"  # Append the category name followed by a newline
-            for item in content:
-                output += f"  - {item}\n"  # Append each item in the list, formatted as a list
-            output += "\n"  # Append a newline for better separation between categories
-    return output.strip()
-def fragment_single_abstract(abstract):
-    """
-    Processes a single abstract by fragmenting it into structured sections based on predefined categories
-    such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model
-    to predict the category of each sentence in the abstract.
-    The process involves several steps:
-    1. Splitting the abstract into sentences.
-    2. Encoding these sentences using a custom embedding layer.
-    3. Classifying each sentence into one of the predefined categories.
-    4. Grouping the sentences by their predicted categories.
-    Parameters:
-    abstract (str): The abstract text that needs to be processed and categorized.
-    Returns:
-    tuple: A tuple containing two elements:
-        - A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions')
-          and values as lists of sentences belonging to these categories. Only non-empty categories are returned.
-        - The time taken to process the abstract (in seconds).
-    Example:
-    ```python
-    abstract_text = "This study aims to evaluate the effectiveness of..."
-    categorized_abstract, processing_time = fragment_single_abstract(abstract_text)
-    print("Categorized Abstract:", categorized_abstract)
-    print("Processing Time:", processing_time)
-    ```
-    Note:
-    - This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer'
-      are available and correctly configured to be loaded.
-    - The function uses pandas for data manipulation, TensorFlow for machine learning operations,
-      and TensorFlow's data API for batching and prefetching data for model predictions.
-    """
-    start_time = timer()
-    original_abstract = split_abstract_original(abstract)
-    df_original = pd.DataFrame(original_abstract)
-    sentences_original = df_original["text"].tolist()
-    abstract_split = split_abstract(abstract)
-    df = pd.DataFrame(abstract_split)
-    sentences = df["text"].tolist()
-    labels = encode_labels(df["target"])
-    objective = []
-    background = []
-    methods = []
-    results = []
-    conclusion = []
-    embed_layer = EmbeddingLayer()
-    model = tf.keras.models.load_model("20k_5_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer})
-    data_by_character = split_sentences_by_characters(sentences)
-    line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15)
-    total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20)
-    sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character))
-    labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
-    dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE)
-    predictions = tf.argmax(model.predict(dataset), axis=1)
-    for i, prediction in enumerate(predictions):
-        if prediction == 0:
-            objective.append(sentences_original[i])
-        elif prediction == 1:
-            methods.append(sentences_original[i])
-        elif prediction == 2:
-            results.append(sentences_original[i])
-        elif prediction == 3:
-            conclusion.append(sentences_original[i])
-        elif prediction == 4:
-            background.append(sentences_original[i])
-    end_time = timer()
-    return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time
-title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen"
-description = f"""
-This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion.
-The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/abs/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture
-was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251)
-This project achieved a testing accuracy of 88.12% and a F1 score of 87.92%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation).
-How to use:
--Paste the given abstract into the box below.
--Make sure to separate each sentence by a new line (this helps avoid ambiguity).
--Click submit, and allow the model to run!
-"""
-demo = gr.Interface(
-    fn=fragment_single_abstract,
-    inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."),
-    outputs=[
-        gr.Textbox(label="Fragmented Abstract"),
-        gr.Number(label="Time to process (s)"),
-    ],
-    examples=sample_list,
-    title=title,
-    description=description,
-)
 demo.launch(share=False)

+"""
+Ryan Tietjen
+Sep 2024
+Demo application for paper abstract fragmentaion demonstration
+"""
+import gradio as gr
+import tensorflow as tf
+from tensorflow import keras
+from keras import layers
+from timeit import default_timer as timer
+from process_input import split_abstract
+from process_input import split_abstract_original
+from process_input import split_sentences_by_characters
+import pandas as pd
+import tensorflow_hub as hub
+from model import EmbeddingLayer
+from process_input import encode_labels
+sample_list = []
+example1 =  f"""The aim of this study was to describe the electrocardiographic ( ECG ) evolutionary changes after an acute myocardial infarction ( AMI ) and to evaluate their correlation with left ventricular function and remodeling.
+The QRS complex changes after AMI have been correlated with infarct size and left ventricular function.
+By contrast , the significance of T wave changes is controversial.
+We studied 536 patients enrolled in the GISSI-3-Echo substudy who underwent ECG and echocardiographic studies at 24 to 48 h ( S1 ) , at hospital discharge ( S2 ) , at six weeks ( S3 ) and six months ( S4 ) after AMI.
+The number of Qwaves ( nQ ) and QRS quantitative score ( QRSs ) did not change over time.
+From S2 to S4 , the number of negative T waves ( nT NEG ) decreased ( p < 0.0001 ) , wall motion abnormalities ( % WMA ) improved ( p < 0.001 ) , ventricular volumes increased ( p < 0.0001 ) while ejection fraction remained stable.
+According to the T wave changes after hospital discharge , patients were divided into four groups : stable positive T waves ( group 1 , n = 35 ) , patients who showed a decrease > or = 1 in nT NEG ( group 2 , n = 361 ) , patients with no change in nT NEG ( group 3 , n = 64 ) and those with an increase > or = 1 in nT NEG ( group 4 , n = 76 ).
+The QRSs and nQ remained stable in all groups.
+Groups 3 and 4 showed less recovery in % WMA , more pronounced ventricular enlargement and progressive decline in ejection fraction than groups 1 and 2 ( interaction time x groups p < 0.0001 ).
+The analysis of serial ECG can predict postinfarct left ventricular remodeling.
+Normalization of negative T waves during the follow-up appears more strictly related to recovery of regional dysfunction than QRS changes.
+Lack of resolution and late appearance of new negative T predict unfavorable remodeling with progressive deterioration of ventricular function."""
+sample_list.append(example1)
+def format_non_empty_lists(objective, background, methods, results, conclusion):
+    """
+    This function checks each provided list and formats a string with the list name and its contents
+    only if the list is not empty.
+    Parameters:
+    - objective (list): List containing sentences classified as 'Objective'.
+    - background (list): List containing sentences classified as 'Background'.
+    - methods (list): List containing sentences classified as 'Methods'.
+    - results (list): List containing sentences classified as 'Results'.
+    - conclusion (list): List containing sentences classified as 'Conclusion'.
+    Returns:
+    - str: A formatted string that contains the non-empty list names and their contents.
+    """
+    output = ""
+    lists = {
+        'Objective': objective,
+        'Background': background,
+        'Methods': methods,
+        'Results': results,
+        'Conclusion': conclusion
+    }
+    for name, content in lists.items():
+        if content:  # Check if the list is not empty
+            output += f"{name}:\n"  # Append the category name followed by a newline
+            for item in content:
+                output += f"  - {item}\n"  # Append each item in the list, formatted as a list
+            output += "\n"  # Append a newline for better separation between categories
+    return output.strip()
+def fragment_single_abstract(abstract):
+    """
+    Processes a single abstract by fragmenting it into structured sections based on predefined categories
+    such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model
+    to predict the category of each sentence in the abstract.
+    The process involves several steps:
+    1. Splitting the abstract into sentences.
+    2. Encoding these sentences using a custom embedding layer.
+    3. Classifying each sentence into one of the predefined categories.
+    4. Grouping the sentences by their predicted categories.
+    Parameters:
+    abstract (str): The abstract text that needs to be processed and categorized.
+    Returns:
+    tuple: A tuple containing two elements:
+        - A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions')
+          and values as lists of sentences belonging to these categories. Only non-empty categories are returned.
+        - The time taken to process the abstract (in seconds).
+    Example:
+    ```python
+    abstract_text = "This study aims to evaluate the effectiveness of..."
+    categorized_abstract, processing_time = fragment_single_abstract(abstract_text)
+    print("Categorized Abstract:", categorized_abstract)
+    print("Processing Time:", processing_time)
+    ```
+    Note:
+    - This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer'
+      are available and correctly configured to be loaded.
+    - The function uses pandas for data manipulation, TensorFlow for machine learning operations,
+      and TensorFlow's data API for batching and prefetching data for model predictions.
+    """
+    start_time = timer()
+    original_abstract = split_abstract_original(abstract)
+    df_original = pd.DataFrame(original_abstract)
+    sentences_original = df_original["text"].tolist()
+    abstract_split = split_abstract(abstract)
+    df = pd.DataFrame(abstract_split)
+    sentences = df["text"].tolist()
+    labels = encode_labels(df["target"])
+    objective = []
+    background = []
+    methods = []
+    results = []
+    conclusion = []
+    embed_layer = EmbeddingLayer()
+    model = tf.keras.models.load_model("20k_5_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer})
+    data_by_character = split_sentences_by_characters(sentences)
+    line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15)
+    total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20)
+    sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character))
+    labels_dataset = tf.data.Dataset.from_tensor_slices(labels)
+    dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE)
+    predictions = tf.argmax(model.predict(dataset), axis=1)
+    for i, prediction in enumerate(predictions):
+        if prediction == 0:
+            objective.append(sentences_original[i])
+        elif prediction == 1:
+            methods.append(sentences_original[i])
+        elif prediction == 2:
+            results.append(sentences_original[i])
+        elif prediction == 3:
+            conclusion.append(sentences_original[i])
+        elif prediction == 4:
+            background.append(sentences_original[i])
+    end_time = timer()
+    return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time
+title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen"
+description = f"""
+This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion.
+The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/abs/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture
+was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251)
+This model achieved a testing accuracy of 88.12% and a F1 score of 87.92%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation).
+How to use:
+-Paste the given abstract into the box below.
+-Make sure to separate each sentence by a new line (this helps avoid ambiguity).
+-Click submit, and allow the model to run!
+"""
+demo = gr.Interface(
+    fn=fragment_single_abstract,
+    inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."),
+    outputs=[
+        gr.Textbox(label="Fragmented Abstract"),
+        gr.Number(label="Time to process (s)"),
+    ],
+    examples=sample_list,
+    title=title,
+    description=description,
+)
 demo.launch(share=False)