Spaces:

SiddharthAK
/

TextLSRDemo

Running

App Files Files Community

SiddharthAK commited on 21 days ago

Commit

45b666a

verified ·

1 Parent(s): 22a278f

output all nonzero terms (instead of top 20)

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -46,14 +46,17 @@ def get_splade_representation(text):
         output = model_splade(**inputs)
     if hasattr(output, 'logits'):
-        splade_vector = torch.max(torch.log(1 + torch.relu(output.logits)) * inputs['attention_mask'].unsqueeze(-1), dim=1)[0].squeeze()
     else:
         return "Model output structure not as expected for SPLADE. 'logits' not found."
     indices = torch.nonzero(splade_vector).squeeze().cpu().tolist()
     if not isinstance(indices, list):
         indices = [indices]
     values = splade_vector[indices].cpu().tolist()
     token_weights = dict(zip(indices, values))
@@ -65,15 +68,13 @@ def get_splade_representation(text):
     sorted_representation = sorted(meaningful_tokens.items(), key=lambda item: item[1], reverse=True)
-    formatted_output = "SPLADE Representation (Top 20 Terms):\n"
     if not sorted_representation:
         formatted_output += "No significant terms found for this input.\n"
     else:
-        for i, (term, weight) in enumerate(sorted_representation):
-            if i >= 20:
-                break
             formatted_output += f"- **{term}**: {weight:.4f}\n"
     formatted_output += "\n--- Raw SPLADE Vector Info ---\n"
     formatted_output += f"Total non-zero terms in vector: {len(indices)}\n"
     formatted_output += f"Sparsity: {1 - (len(indices) / tokenizer_splade.vocab_size):.2%}\n"
@@ -81,6 +82,8 @@ def get_splade_representation(text):
     return formatted_output
 def get_unicoil_binary_representation(text):
     if tokenizer_unicoil is None or model_unicoil is None:
         return "UNICOIL model is not loaded. Please check the console for loading errors."

         output = model_splade(**inputs)
     if hasattr(output, 'logits'):
+        splade_vector = torch.max(
+            torch.log(1 + torch.relu(output.logits)) * inputs['attention_mask'].unsqueeze(-1),
+            dim=1
+        )[0].squeeze()
     else:
         return "Model output structure not as expected for SPLADE. 'logits' not found."
     indices = torch.nonzero(splade_vector).squeeze().cpu().tolist()
     if not isinstance(indices, list):
         indices = [indices]
     values = splade_vector[indices].cpu().tolist()
     token_weights = dict(zip(indices, values))
     sorted_representation = sorted(meaningful_tokens.items(), key=lambda item: item[1], reverse=True)
+    formatted_output = "SPLADE Representation (All Non-Zero Terms):\n"
     if not sorted_representation:
         formatted_output += "No significant terms found for this input.\n"
     else:
+        for term, weight in sorted_representation:
             formatted_output += f"- **{term}**: {weight:.4f}\n"
     formatted_output += "\n--- Raw SPLADE Vector Info ---\n"
     formatted_output += f"Total non-zero terms in vector: {len(indices)}\n"
     formatted_output += f"Sparsity: {1 - (len(indices) / tokenizer_splade.vocab_size):.2%}\n"
     return formatted_output
 def get_unicoil_binary_representation(text):
     if tokenizer_unicoil is None or model_unicoil is None:
         return "UNICOIL model is not loaded. Please check the console for loading errors."