Spaces:

alpata
/

ReactionPrediction

Running

App Files Files Community

alpata commited on Jul 31

Commit

4afe335

verified ·

1 Parent(s): 89588fc

Upload 3 files

Browse files

Files changed (3) hide show

README.md +8 -9
app.py +121 -101
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -4,26 +4,25 @@ emoji: 🧪
 colorFrom: blue
 colorTo: green
 sdk: streamlit
-sdk_version: 1.25.0
 app_file: app.py
 pinned: false
 ---
-# Chemical Reaction Predictor
 This application predicts the products of chemical reactions using a state-of-the-art T5-based model.
 ## How to Use the App
-1.  **Input Molecules**: You can either:
-    *   Use the **Chemical Drawing Tool** to draw the reactant and reagent molecules.
-    *   Go to the **SMILES Text Input** tab and paste the SMILES strings directly.
-2.  **Set Parameters**: In the sidebar, you can select the number of predictions you want to generate.
-3.  **Predict**: Click the "Predict Product" button to see the results.
-4.  **Load Examples**: Use the dropdown in the sidebar to load pre-defined example reactions to see how the app works.
 ## About the Model
-This application uses the `sagawa/ReactionT5v2-forward-USPTO_MIT` model, which has been fine-tuned for forward reaction prediction. It achieves a high accuracy of over 97% on the USPTO_MIT dataset.
 For more details about the model, please visit its page on the [Hugging Face Hub](https://huggingface.co/sagawa/ReactionT5v2-forward-USPTO_MIT).

 colorFrom: blue
 colorTo: green
 sdk: streamlit
 app_file: app.py
 pinned: false
 ---
+# 🧪 Chemical Reaction Predictor
 This application predicts the products of chemical reactions using a state-of-the-art T5-based model.
 ## How to Use the App
+1.  **Input Molecules**: You have two options:
+    *   Use the **✍️ Chemical Drawing Tool** to draw the reactant and reagent molecules.
+    *   Switch to the **⌨️ SMILES Text Input** tab and paste the SMILES strings directly.
+2.  **Load Examples (Optional)**: Use the dropdown in the sidebar to load pre-defined example reactions to see how the app works.
+3.  **Set Parameters**: In the sidebar, you can select the number of predictions you want to generate.
+4.  **Predict**: Click the "Predict Product" button to see the results.
 ## About the Model
+This application uses the `sagawa/ReactionT5v2-forward-USPTO_MIT` model, which has been fine-tuned for forward reaction prediction.
 For more details about the model, please visit its page on the [Hugging Face Hub](https://huggingface.co/sagawa/ReactionT5v2-forward-USPTO_MIT).

app.py CHANGED Viewed

@@ -1,155 +1,175 @@
 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
-import torch
 from rdkit import Chem
 from rdkit.Chem import Draw
 from streamlit_ketcher import st_ketcher
-# Set page configuration
-st.set_page_config(page_title="Chemical Reaction Predictor", layout="wide")
-# Function to load the model and tokenizer
 @st.cache_resource
 def load_model():
     """Loads the T5 model and tokenizer from Hugging Face."""
     model_name = "sagawa/ReactionT5v2-forward-USPTO_MIT"
-    model = T5ForConditionalGeneration.from_pretrained(model_name)
-    tokenizer = T5Tokenizer.from_pretrained(model_name)
-    return model, tokenizer
-# Function to predict the product
 def predict_product(reactants, reagents, model, tokenizer, num_predictions):
     """Predicts the reaction product using the T5 model."""
     input_text = f"reactants>{reactants}.reagents>{reagents}>products>"
     input_ids = tokenizer.encode(input_text, return_tensors='pt')
     # Generate predictions
     outputs = model.generate(
         input_ids,
         max_length=512,
-        num_beams=5,
         num_return_sequences=num_predictions,
-        early_stopping=True
     )
-    # Decode the predictions
     predictions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
     return predictions
-# Function to display molecules
 def display_molecule(smiles_string, legend):
-    """Displays a molecule from a SMILES string."""
     mol = Chem.MolFromSmiles(smiles_string)
     if mol:
-        img = Draw.MolToImage(mol, size=(300, 300), legend=legend)
-        st.image(img, use_column_width='auto')
     else:
-        st.warning(f"Could not generate molecule for SMILES: {smiles_string}")
-# --- UI Layout ---
-# Header
-st.title("Chemical Reaction Predictor")
-st.markdown("Predict the products of chemical reactions using the `sagawa/ReactionT5v2-forward-USPTO_MIT` model.")
-# Load Model
-with st.spinner("Loading the prediction model..."):
-    model, tokenizer = load_model()
-# Sidebar
 with st.sidebar:
     st.header("Controls and Information")
     # Example Reactions
-    st.subheader("Example Reactions")
     example_reactions = {
         "Esterification": ("CCO.O=C(O)C", "C(C)(=O)O"),
         "Amide Formation": ("CCN.O=C(Cl)C", ""),
         "Suzuki Coupling": ("[B-](C1=CC=CC=C1)(F)(F)F.[K+].CC1=CC=C(Br)C=C1", "c1ccc(B(O)O)cc1"),
     }
-    selected_example = st.selectbox("Choose an example:", list(example_reactions.keys()))
-    if st.button("Load Example"):
-        reactants_smiles_example, reagents_smiles_example = example_reactions[selected_example]
-        st.session_state.reactants_smiles = reactants_smiles_example
-        st.session_state.reagents_smiles = reagents_smiles_example
-        st.session_state.ketcher_reactants = reactants_smiles_example
-        st.session_state.ketcher_reagents = reagents_smiles_example
     # Prediction Parameters
     st.subheader("Prediction Parameters")
-    num_predictions = st.slider("Number of Predictions to Generate", 1, 5, 1)
     # About Section
     st.subheader("About")
     st.info(
-        "This app uses the `sagawa/ReactionT5v2-forward-USPTO_MIT` model to predict chemical reaction products. "
-        "Draw or input the SMILES strings for reactants and reagents, then click 'Predict Product'."
     )
-    st.markdown("[Model on Hugging Face](https://huggingface.co/sagawa/ReactionT5v2-forward-USPTO_MIT)")
-# Main Content
-st.header("Input Reactants and Reagents")
-# Initialize session state for SMILES
-if 'reactants_smiles' not in st.session_state:
-    st.session_state.reactants_smiles = ""
-if 'reagents_smiles' not in st.session_state:
-    st.session_state.reagents_smiles = ""
-# Input Tabs
-input_tab1, input_tab2 = st.tabs(["Chemical Drawing Tool", "SMILES Text Input"])
-with input_tab1:
-    st.subheader("Draw Molecules")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.write("Reactants")
-        if 'ketcher_reactants' in st.session_state:
-            reactant_smiles_from_drawing = st_ketcher(st.session_state.ketcher_reactants, key="ketcher_reactants")
-        else:
-            reactant_smiles_from_drawing = st_ketcher("", key="ketcher_reactants")
-    with col2:
-        st.write("Reagents")
-        if 'ketcher_reagents' in st.session_state:
-             reagent_smiles_from_drawing = st_ketcher(st.session_state.ketcher_reagents, key="ketcher_reagents")
         else:
-             reagent_smiles_from_drawing = st_ketcher("", key="ketcher_reagents")
-    if reactant_smiles_from_drawing != st.session_state.get('ketcher_reactants_val'):
-        st.session_state.reactants_smiles = reactant_smiles_from_drawing
-        st.session_state.ketcher_reactants_val = reactant_smiles_from_drawing
-    if reagent_smiles_from_drawing != st.session_state.get('ketcher_reagents_val'):
-        st.session_state.reagents_smiles = reagent_smiles_from_drawing
-        st.session_state.ketcher_reagents_val = reagent_smiles_from_drawing
-with input_tab2:
-    st.subheader("Enter SMILES Strings")
-    reactants_smiles = st.text_input("Reactants SMILES", st.session_state.reactants_smiles, key="reactants_text_input")
-    reagents_smiles = st.text_input("Reagents SMILES", st.session_state.reagents_smiles, key="reagents_text_input")
-    st.session_state.reactants_smiles = reactants_smiles
-    st.session_state.reagents_smiles = reagents_smiles
-# Prediction Button
-if st.button("Predict Product", type="primary"):
-    reactants_to_use = st.session_state.reactants_smiles
-    reagents_to_use = st.session_state.reagents_smiles
-    if not reactants_to_use:
-        st.error("Please provide reactants.")
-    else:
-        with st.spinner("Predicting reaction..."):
-            predictions = predict_product(reactants_to_use, reagents_to_use, model, tokenizer, num_predictions)
-            st.header("Predicted Products")
-            for i, product_smiles in enumerate(predictions):
-                st.subheader(f"Prediction #{i+1}")
-                st.code(product_smiles, language="smiles")
-                display_molecule(product_smiles, f"Predicted Product {i+1}")

 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 from rdkit import Chem
 from rdkit.Chem import Draw
 from streamlit_ketcher import st_ketcher
+import torch
+# --- Page Configuration ---
+st.set_page_config(
+    page_title="Chemical Reaction Predictor",
+    page_icon="🧪",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# --- Model Loading ---
 @st.cache_resource
 def load_model():
     """Loads the T5 model and tokenizer from Hugging Face."""
     model_name = "sagawa/ReactionT5v2-forward-USPTO_MIT"
+    try:
+        model = T5ForConditionalGeneration.from_pretrained(model_name)
+        tokenizer = T5Tokenizer.from_pretrained(model_name)
+        return model, tokenizer
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None, None
+# --- Core Functions ---
 def predict_product(reactants, reagents, model, tokenizer, num_predictions):
     """Predicts the reaction product using the T5 model."""
+    # Format the input string as required by the model
     input_text = f"reactants>{reactants}.reagents>{reagents}>products>"
     input_ids = tokenizer.encode(input_text, return_tensors='pt')
     # Generate predictions
     outputs = model.generate(
         input_ids,
         max_length=512,
+        num_beams=num_predictions * 2,  # Generate more beams for better results
         num_return_sequences=num_predictions,
+        early_stopping=True,
     )
+    # Decode predictions
     predictions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
     return predictions
 def display_molecule(smiles_string, legend):
+    """Generates and displays a molecule image from a SMILES string."""
     mol = Chem.MolFromSmiles(smiles_string)
     if mol:
+        try:
+            img = Draw.MolToImage(mol, size=(350, 350), legend=legend)
+            st.image(img, use_column_width='auto')
+        except Exception as e:
+            st.warning(f"Could not generate image for SMILES: {smiles_string}. Error: {e}")
     else:
+        st.warning(f"Invalid SMILES string provided: {smiles_string}")
+# --- Initialize Session State ---
+if 'reactants' not in st.session_state:
+    st.session_state.reactants = ""
+if 'reagents' not in st.session_state:
+    st.session_state.reagents = ""
+# --- Sidebar UI ---
 with st.sidebar:
+    st.title("🧪 Reaction Predictor")
+    st.markdown("---")
     st.header("Controls and Information")
     # Example Reactions
     example_reactions = {
+        "Select an example...": ("", ""),
         "Esterification": ("CCO.O=C(O)C", "C(C)(=O)O"),
         "Amide Formation": ("CCN.O=C(Cl)C", ""),
         "Suzuki Coupling": ("[B-](C1=CC=CC=C1)(F)(F)F.[K+].CC1=CC=C(Br)C=C1", "c1ccc(B(O)O)cc1"),
     }
+    def on_example_change():
+        example_key = st.session_state.example_select
+        reactants, reagents = example_reactions[example_key]
+        st.session_state.reactants = reactants
+        st.session_state.reagents = reagents
+    st.selectbox(
+        "Load an Example Reaction",
+        options=list(example_reactions.keys()),
+        key="example_select",
+        on_change=on_example_change
+    )
     # Prediction Parameters
+    st.markdown("---")
     st.subheader("Prediction Parameters")
+    num_predictions = st.slider("Number of Predictions", 1, 5, 1)
+    st.markdown("---")
     # About Section
     st.subheader("About")
     st.info(
+        "This app uses the sagawa/ReactionT5v2-forward-USPTO_MIT model to predict chemical reaction products. "
+        "Draw molecules or input SMILES strings, then click 'Predict Product'."
     )
+    st.markdown("[View Model on Hugging Face](https://huggingface.co/sagawa/ReactionT5v2-forward-USPTO_MIT)")
+# --- Main Application UI ---
+st.title("Chemical Reaction Predictor")
+# Load Model
+model, tokenizer = load_model()
+if model and tokenizer:
+    st.success("Model loaded successfully!")
+    # Input Section
+    st.header("1. Input Reactants and Reagents")
+    input_tab1, input_tab2 = st.tabs(["✍️ Chemical Drawing Tool", "⌨️ SMILES Text Input"])
+    # Callback functions to update session state from text inputs
+    def on_reactant_text_change():
+        st.session_state.reactants = st.session_state.reactant_text
+    def on_reagent_text_change():
+        st.session_state.reagents = st.session_state.reagent_text
+    with input_tab1:
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Reactants")
+            # The ketcher component's value is controlled by session state
+            reactant_smiles_drawing = st_ketcher(value=st.session_state.reactants, key="ketcher_reactants")
+            # If the drawing changes, update the session state
+            if reactant_smiles_drawing != st.session_state.reactants:
+                st.session_state.reactants = reactant_smiles_drawing
+                st.experimental_rerun()
+        with col2:
+            st.subheader("Reagents")
+            reagent_smiles_drawing = st_ketcher(value=st.session_state.reagents, key="ketcher_reagents")
+            if reagent_smiles_drawing != st.session_state.reagents:
+                st.session_state.reagents = reagent_smiles_drawing
+                st.experimental_rerun()
+    with input_tab2:
+        st.subheader("Enter SMILES Strings")
+        st.text_input("Reactants SMILES", key="reactant_text", on_change=on_reactant_text_change, value=st.session_state.reactants)
+        st.text_input("Reagents SMILES (optional)", key="reagent_text", on_change=on_reagent_text_change, value=st.session_state.reagents)
+    st.info(f"**Current Reactants:** `{st.session_state.reactants}`")
+    st.info(f"**Current Reagents:** `{st.session_state.reagents}`")
+    st.header("2. Generate Prediction")
+    if st.button("Predict Product", type="primary", use_container_width=True):
+        if not st.session_state.reactants:
+            st.error("Error: Reactants cannot be empty. Please draw a molecule or provide a SMILES string.")
         else:
+            with st.spinner("Running prediction... This may take a moment."):
+                predictions = predict_product(
+                    st.session_state.reactants,
+                    st.session_state.reagents,
+                    model,
+                    tokenizer,
+                    num_predictions
+                )
+                st.header("3. Predicted Products")
+                for i, product_smiles in enumerate(predictions):
+                    st.subheader(f"Top Prediction #{i+1}")
+                    st.code(product_smiles, language="smiles")
+                    display_molecule(product_smiles, f"Predicted Product #{i+1}")
+else:
+    st.error("Application could not start. Please check the logs on Hugging Face Spaces.")

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ streamlit
 transformers
 torch
 rdkit
-streamlit-ketcher

 transformers
 torch
 rdkit
+streamlit-ketcher
+sentencepiece