Spaces:

SiddharthAK
/

TextLSRDemo

Running

App Files Files Community

SiddharthAK commited on 17 days ago

Commit

3bcd060

verified ·

1 Parent(s): 372cab2

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -45

app.py CHANGED Viewed

@@ -244,7 +244,8 @@ def predict_representation_explorer(model_choice, text):
     else:
         return "Please select a model."
-# --- NEW: Core Representation Functions (Return RAW TENSORS - for Dot Product Tab) ---
 def get_splade_cocondenser_vector(text):
     if tokenizer_splade is None or model_splade is None:
         return None
@@ -307,7 +308,8 @@ def get_splade_doc_vector(text):
     return None
-# --- NEW: Function to get formatted representation from a raw vector and tokenizer ---
 def format_sparse_vector_output(splade_vector, tokenizer, is_binary=False):
     if splade_vector is None:
         return "Failed to generate vector."
@@ -353,48 +355,42 @@ def format_sparse_vector_output(splade_vector, tokenizer, is_binary=False):
     return formatted_output
-# --- NEW: Dot Product Calculation Function for the new tab ---
-def calculate_dot_product_and_representations(model_choice, query_text, doc_text):
-    query_vector = None
-    doc_vector = None
-    query_rep_str = ""
-    doc_rep_str = ""
-    selected_tokenizer = None
-    if model_choice == "SPLADE-cocondenser-distil (weighting and expansion)":
-        query_vector = get_splade_cocondenser_vector(query_text)
-        doc_vector = get_splade_cocondenser_vector(doc_text)
-        selected_tokenizer = tokenizer_splade
-        query_rep_str = "Query SPLADE-cocondenser-distil Representation (Weighting and Expansion):\n"
-        doc_rep_str = "Document SPLADE-cocondenser-distil Representation (Weighting and Expansion):\n"
-        is_binary = False
-    elif model_choice == "SPLADE-v3-Lexical (weighting)":
-        query_vector = get_splade_lexical_vector(query_text)
-        doc_vector = get_splade_lexical_vector(doc_text)
-        selected_tokenizer = tokenizer_splade_lexical
-        query_rep_str = "Query SPLADE-v3-Lexical Representation (Weighting):\n"
-        doc_rep_str = "Document SPLADE-v3-Lexical Representation (Weighting):\n"
-        is_binary = False
-    elif model_choice == "SPLADE-v3-Doc (binary)":
-        query_vector = get_splade_doc_vector(query_text)
-        doc_vector = get_splade_doc_vector(doc_text)
-        selected_tokenizer = tokenizer_splade_doc
-        query_rep_str = "Query SPLADE-v3-Doc Representation (Binary):\n"
-        doc_rep_str = "Document SPLADE-v3-Doc Representation (Binary):\n"
-        is_binary = True
     else:
-        return "Please select a model.", "", ""
     if query_vector is None or doc_vector is None:
-        return "Failed to generate one or both vectors. Please check model loading.", "", ""
     # Calculate dot product
     dot_product = float(torch.dot(query_vector.cpu(), doc_vector.cpu()).item())
     # Format representations
-    query_rep_str += format_sparse_vector_output(query_vector, selected_tokenizer, is_binary)
-    doc_rep_str += format_sparse_vector_output(doc_vector, selected_tokenizer, is_binary)
     # Combine output
     full_output = f"### Dot Product Score: {dot_product:.6f}\n\n"
@@ -437,18 +433,27 @@ with gr.Blocks(title="SPLADE Demos") as demo:
         with gr.TabItem("Query-Document Dot Product Calculator"): # NEW TAB
             gr.Markdown("### Calculate Dot Product Similarity between Query and Document")
-            gr.Markdown("Select a SPLADE model to encode both your query and document, then see their sparse representations and their similarity score.")
             gr.Interface(
-                fn=calculate_dot_product_and_representations,
                 inputs=[
                     gr.Radio(
-                        [
-                            "SPLADE-cocondenser-distil (weighting and expansion)",
-                            "SPLADE-v3-Lexical (weighting)",
-                            "SPLADE-v3-Doc (binary)"
-                        ],
-                        label="Choose Encoding Model",
-                        value="SPLADE-cocondenser-distil (weighting and expansion)"
                     ),
                     gr.Textbox(
                         lines=3,

     else:
         return "Please select a model."
+# --- Core Representation Functions (Return RAW TENSORS - for Dot Product Tab) ---
+# These functions remain unchanged from the previous iteration, as they return the raw tensors.
 def get_splade_cocondenser_vector(text):
     if tokenizer_splade is None or model_splade is None:
         return None
     return None
+# --- Function to get formatted representation from a raw vector and tokenizer ---
+# This function remains unchanged as it's a generic formatter for any sparse vector.
 def format_sparse_vector_output(splade_vector, tokenizer, is_binary=False):
     if splade_vector is None:
         return "Failed to generate vector."
     return formatted_output
+# --- NEW/MODIFIED: Helper to get the correct vector function, tokenizer, and binary flag ---
+def get_model_assets(model_choice_str):
+    if model_choice_str == "SPLADE-cocondenser-distil (weighting and expansion)":
+        return get_splade_cocondenser_vector, tokenizer_splade, False, "SPLADE-cocondenser-distil (Weighting and Expansion)"
+    elif model_choice_str == "SPLADE-v3-Lexical (weighting)":
+        return get_splade_lexical_vector, tokenizer_splade_lexical, False, "SPLADE-v3-Lexical (Weighting)"
+    elif model_choice_str == "SPLADE-v3-Doc (binary)":
+        return get_splade_doc_vector, tokenizer_splade_doc, True, "SPLADE-v3-Doc (Binary)"
     else:
+        return None, None, False, "Unknown Model"
+# --- MODIFIED: Dot Product Calculation Function for the new tab ---
+def calculate_dot_product_and_representations_independent(query_model_choice, doc_model_choice, query_text, doc_text):
+    query_vector_fn, query_tokenizer, query_is_binary, query_model_name_display = get_model_assets(query_model_choice)
+    doc_vector_fn, doc_tokenizer, doc_is_binary, doc_model_name_display = get_model_assets(doc_model_choice)
+    if query_vector_fn is None or doc_vector_fn is None:
+        return "Please select valid models for both query and document encoding.", "", ""
+    query_vector = query_vector_fn(query_text)
+    doc_vector = doc_vector_fn(doc_text)
     if query_vector is None or doc_vector is None:
+        return "Failed to generate one or both vectors. Please check model loading and input text.", "", ""
     # Calculate dot product
+    # Ensure both vectors are on CPU before dot product to avoid device mismatch issues
+    # and to ensure .item() works reliably for conversion to float.
     dot_product = float(torch.dot(query_vector.cpu(), doc_vector.cpu()).item())
     # Format representations
+    query_rep_str = f"Query Representation ({query_model_name_display}):\n"
+    query_rep_str += format_sparse_vector_output(query_vector, query_tokenizer, query_is_binary)
+    doc_rep_str = f"Document Representation ({doc_model_name_display}):\n"
+    doc_rep_str += format_sparse_vector_output(doc_vector, doc_tokenizer, doc_is_binary)
     # Combine output
     full_output = f"### Dot Product Score: {dot_product:.6f}\n\n"
         with gr.TabItem("Query-Document Dot Product Calculator"): # NEW TAB
             gr.Markdown("### Calculate Dot Product Similarity between Query and Document")
+            gr.Markdown("Select **independent** SPLADE models to encode your query and document, then see their sparse representations and their similarity score.")
+            # Define the common model choices for cleaner code
+            model_choices = [
+                "SPLADE-cocondenser-distil (weighting and expansion)",
+                "SPLADE-v3-Lexical (weighting)",
+                "SPLADE-v3-Doc (binary)"
+            ]
             gr.Interface(
+                fn=calculate_dot_product_and_representations_independent, # MODIFIED FUNCTION NAME
                 inputs=[
                     gr.Radio(
+                        model_choices,
+                        label="Choose Query Encoding Model",
+                        value="SPLADE-cocondenser-distil (weighting and expansion)" # Default value
+                    ),
+                    gr.Radio(
+                        model_choices,
+                        label="Choose Document Encoding Model",
+                        value="SPLADE-cocondenser-distil (weighting and expansion)" # Default value
                     ),
                     gr.Textbox(
                         lines=3,