File size: 28,759 Bytes
b4c92f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
"""
LLM Service for Tibetan Text Metrics

This module provides a unified interface for analyzing text similarity metrics
using both LLM-based and rule-based approaches.
"""

import os
import json
import logging
import requests
import pandas as pd
import re

# Set up logging
logger = logging.getLogger(__name__)

# Try to load environment variables
ENV_LOADED = False
try:
    from dotenv import load_dotenv
    load_dotenv()
    ENV_LOADED = True
except ImportError:
    logger.warning("python-dotenv not installed. Using system environment variables.")

# Constants
DEFAULT_MAX_TOKENS = 4000
DEFAULT_MODEL = "mistralai/mistral-7b-instruct"
DEFAULT_TEMPERATURE = 0.3
DEFAULT_TOP_P = 0.9

class LLMService:
    """
    Service for analyzing text similarity metrics using LLMs and rule-based methods.
    """
    
    def __init__(self, api_key: str = None):
        """
        Initialize the LLM service.
        
        Args:
            api_key: Optional API key for OpenRouter. If not provided, will try to load from environment.
        """
        self.api_key = api_key or os.getenv('OPENROUTER_API_KEY')
        self.model = DEFAULT_MODEL
        self.temperature = DEFAULT_TEMPERATURE
        self.top_p = DEFAULT_TOP_P
    
    def analyze_similarity(
        self, 
        results_df: pd.DataFrame, 
        use_llm: bool = True,
    ) -> str:
        """
        Analyze similarity metrics using either LLM or rule-based approach.
        
        Args:
            results_df: DataFrame containing similarity metrics
            use_llm: Whether to use LLM for analysis (falls back to rule-based if False or on error)
            
        Returns:
            str: Analysis of the metrics in markdown format with appropriate fallback messages
        """
        # If LLM is disabled, use rule-based analysis
        if not use_llm:
            logger.info("LLM analysis disabled. Using rule-based analysis.")
            return self._analyze_with_rules(results_df)
            
        # Try LLM analysis if enabled
        try:
            if not self.api_key:
                raise ValueError("No OpenRouter API key provided. Please set the OPENROUTER_API_KEY environment variable.")
                
            logger.info("Attempting LLM-based analysis...")
            return self._analyze_with_llm(results_df, max_tokens=DEFAULT_MAX_TOKENS)
            
        except Exception as e:
            error_msg = str(e)
            logger.error(f"Error in LLM analysis: {error_msg}")
            
            # Create a user-friendly error message
            if "payment" in error_msg.lower() or "402" in error_msg:
                error_note = "OpenRouter API payment required. Falling back to rule-based analysis."
            elif "invalid" in error_msg.lower() or "401" in error_msg:
                error_note = "Invalid OpenRouter API key. Falling back to rule-based analysis."
            elif "rate limit" in error_msg.lower() or "429" in error_msg:
                error_note = "API rate limit exceeded. Falling back to rule-based analysis."
            else:
                error_note = f"LLM analysis failed: {error_msg[:200]}. Falling back to rule-based analysis."
            
            # Get rule-based analysis
            rule_based_analysis = self._analyze_with_rules(results_df)
            
            # Combine the error message with the rule-based analysis
            return f"## Analysis of Tibetan Text Similarity Metrics\n\n*Note: {error_note}*\n\n{rule_based_analysis}"
    
    def _prepare_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prepare the DataFrame for analysis.
        
        Args:
            df: Input DataFrame with similarity metrics
            
        Returns:
            pd.DataFrame: Cleaned and prepared DataFrame
        """
        # Make a copy to avoid modifying the original
        df = df.copy()
        
        # Clean text columns
        text_cols = ['Text A', 'Text B']
        for col in text_cols:
            if col in df.columns:
                df[col] = df[col].fillna('Unknown').astype(str)
                df[col] = df[col].str.replace('.txt$', '', regex=True)
        
        # Filter out perfect matches (likely empty cells)
        metrics_cols = ['Jaccard Similarity (%)', 'Normalized LCS', 'TF-IDF Cosine Sim']
        if all(col in df.columns for col in metrics_cols):
            mask = ~((df['Jaccard Similarity (%)'] == 100.0) & 
                    (df['Normalized LCS'] == 1.0) & 
                    (df['TF-IDF Cosine Sim'] == 1.0))
            df = df[mask].copy()
        
        return df
    
    def _analyze_with_llm(self, df: pd.DataFrame, max_tokens: int) -> str:
        """
        Analyze metrics using an LLM via OpenRouter API.
        
        Args:
            df: Prepared DataFrame with metrics
            max_tokens: Maximum tokens for the response
            
        Returns:
            str: LLM analysis in markdown format
        """
        # Prepare the prompt with data and instructions
        prompt = self._create_llm_prompt(df)
        
        try:
            # Call the LLM API
            response = self._call_openrouter_api(
                prompt=prompt,
                system_message=self._get_system_prompt(),
                max_tokens=max_tokens,
                temperature=self.temperature,
                top_p=self.top_p
            )
            
            # Process and format the response
            return self._format_llm_response(response, df)
            
        except Exception as e:
            logger.error(f"Error in LLM analysis: {str(e)}")
            raise
    
    def _analyze_with_rules(self, df: pd.DataFrame) -> str:
        """
        Analyze metrics using rule-based approach.
        
        Args:
            df: Prepared DataFrame with metrics
            
        Returns:
            str: Rule-based analysis in markdown format
        """
        analysis = ["## Tibetan Text Similarity Analysis (Rule-Based)"]
        
        # Basic stats
        text_a_col = 'Text A' if 'Text A' in df.columns else None
        text_b_col = 'Text B' if 'Text B' in df.columns else None
        
        if text_a_col and text_b_col:
            unique_texts = set(df[text_a_col].unique()) | set(df[text_b_col].unique())
            analysis.append(f"- **Texts analyzed:** {', '.join(sorted(unique_texts))}")
        
        # Analyze each metric
        metric_analyses = []
        
        if 'Jaccard Similarity (%)' in df.columns:
            jaccard_analysis = self._analyze_jaccard(df)
            metric_analyses.append(jaccard_analysis)
            
        if 'Normalized LCS' in df.columns:
            lcs_analysis = self._analyze_lcs(df)
            metric_analyses.append(lcs_analysis)
            
        if 'TF-IDF Cosine Sim' in df.columns:
            tfidf_analysis = self._analyze_tfidf(df)
            metric_analyses.append(tfidf_analysis)
        
        # Add all metric analyses
        if metric_analyses:
            analysis.extend(metric_analyses)
        
        # Add overall interpretation
        analysis.append("\n## Overall Interpretation")
        analysis.append(self._generate_overall_interpretation(df))
        
        return "\n\n".join(analysis)
    
    def _analyze_jaccard(self, df: pd.DataFrame) -> str:
        """Analyze Jaccard similarity scores."""
        jaccard = df['Jaccard Similarity (%)'].dropna()
        if jaccard.empty:
            return ""
            
        mean_jaccard = jaccard.mean()
        max_jaccard = jaccard.max()
        min_jaccard = jaccard.min()
        
        analysis = [
            "### Jaccard Similarity Analysis",
            f"- **Range:** {min_jaccard:.1f}% to {max_jaccard:.1f}% (mean: {mean_jaccard:.1f}%)"
        ]
        
        # Interpret the scores
        if mean_jaccard > 60:
            analysis.append("- **High vocabulary overlap** suggests texts share significant content or are from the same tradition.")
        elif mean_jaccard > 30:
            analysis.append("- **Moderate vocabulary overlap** indicates some shared content or themes.")
        else:
            analysis.append("- **Low vocabulary overlap** suggests texts are on different topics or from different traditions.")
        
        # Add top pairs
        top_pairs = df.nlargest(3, 'Jaccard Similarity (%)')
        if not top_pairs.empty:
            analysis.append("\n**Most similar pairs:**")
            for _, row in top_pairs.iterrows():
                text_a = row.get('Text A', 'Text 1')
                text_b = row.get('Text B', 'Text 2')
                score = row['Jaccard Similarity (%)']
                analysis.append(f"- {text_a} ↔ {text_b}: {score:.1f}%")
        
        return "\n".join(analysis)
    
    def _analyze_lcs(self, df: pd.DataFrame) -> str:
        """Analyze Longest Common Subsequence scores."""
        lcs = df['Normalized LCS'].dropna()
        if lcs.empty:
            return ""
            
        mean_lcs = lcs.mean()
        max_lcs = lcs.max()
        min_lcs = lcs.min()
        
        analysis = [
            "### Structural Similarity (LCS) Analysis",
            f"- **Range:** {min_lcs:.2f} to {max_lcs:.2f} (mean: {mean_lcs:.2f})"
        ]
        
        # Interpret the scores
        if mean_lcs > 0.7:
            analysis.append("- **High structural similarity** suggests texts follow similar organizational patterns.")
        elif mean_lcs > 0.4:
            analysis.append("- **Moderate structural similarity** indicates some shared organizational elements.")
        else:
            analysis.append("- **Low structural similarity** suggests different organizational approaches.")
        
        # Add top pairs
        top_pairs = df.nlargest(3, 'Normalized LCS')
        if not top_pairs.empty:
            analysis.append("\n**Most structurally similar pairs:**")
            for _, row in top_pairs.iterrows():
                text_a = row.get('Text A', 'Text 1')
                text_b = row.get('Text B', 'Text 2')
                score = row['Normalized LCS']
                analysis.append(f"- {text_a} ↔ {text_b}: {score:.2f}")
        
        return "\n".join(analysis)
    
    def _analyze_tfidf(self, df: pd.DataFrame) -> str:
        """Analyze TF-IDF cosine similarity scores."""
        tfidf = df['TF-IDF Cosine Sim'].dropna()
        if tfidf.empty:
            return ""
            
        mean_tfidf = tfidf.mean()
        max_tfidf = tfidf.max()
        min_tfidf = tfidf.min()
        
        analysis = [
            "### Thematic Similarity (TF-IDF) Analysis",
            f"- **Range:** {min_tfidf:.2f} to {max_tfidf:.2f} (mean: {mean_tfidf:.2f})"
        ]
        
        # Interpret the scores
        if mean_tfidf > 0.8:
            analysis.append("- **High thematic similarity** suggests texts share distinctive terms and concepts.")
        elif mean_tfidf > 0.5:
            analysis.append("- **Moderate thematic similarity** indicates some shared distinctive terms.")
        else:
            analysis.append("- **Low thematic similarity** suggests different conceptual focuses.")
        
        # Add top pairs
        top_pairs = df.nlargest(3, 'TF-IDF Cosine Sim')
        if not top_pairs.empty:
            analysis.append("\n**Most thematically similar pairs:**")
            for _, row in top_pairs.iterrows():
                text_a = row.get('Text A', 'Text 1')
                text_b = row.get('Text B', 'Text 2')
                score = row['TF-IDF Cosine Sim']
                analysis.append(f"- {text_a} ↔ {text_b}: {score:.2f}")
        
        return "\n".join(analysis)
    
    def _generate_overall_interpretation(self, df: pd.DataFrame) -> str:
        """Generate an overall interpretation of the metrics."""
        interpretations = []
        
        # Get metrics if they exist
        has_jaccard = 'Jaccard Similarity (%)' in df.columns
        has_lcs = 'Normalized LCS' in df.columns
        has_tfidf = 'TF-IDF Cosine Sim' in df.columns
        
        # Calculate means for available metrics
        metrics = {}
        if has_jaccard:
            metrics['jaccard'] = df['Jaccard Similarity (%)'].mean()
        if has_lcs:
            metrics['lcs'] = df['Normalized LCS'].mean()
        if has_tfidf:
            metrics['tfidf'] = df['TF-IDF Cosine Sim'].mean()
        
        # Generate interpretation based on metrics
        if metrics:
            interpretations.append("Based on the analysis of similarity metrics:")
            
            if has_jaccard and metrics['jaccard'] > 60:
                interpretations.append("- The high Jaccard similarity indicates significant vocabulary overlap between texts, "
                                     "suggesting they may share common sources or be part of the same textual tradition.")
            
            if has_lcs and metrics['lcs'] > 0.7:
                interpretations.append("- The high LCS score indicates strong structural similarity, "
                                     "suggesting the texts may follow similar organizational patterns or share common structural elements.")
            
            if has_tfidf and metrics['tfidf'] > 0.8:
                interpretations.append("- The high TF-IDF similarity suggests the texts share distinctive terms and concepts, "
                                     "indicating they may cover similar topics or themes.")
            
            # Add cross-metric interpretations
            if has_jaccard and has_lcs and metrics['jaccard'] > 60 and metrics['lcs'] > 0.7:
                interpretations.append("\nThe combination of high Jaccard and LCS similarities strongly suggests "
                                     "that these texts are closely related, possibly being different versions or "
                                     "transmissions of the same work or sharing a common source.")
            
            if has_tfidf and has_jaccard and metrics['tfidf'] < 0.5 and metrics['jaccard'] > 60:
                interpretations.append("\nThe high Jaccard but lower TF-IDF similarity suggests that while the texts "
                                     "share many common words, they may use them in different contexts or with different "
                                     "meanings, possibly indicating different interpretations of similar material.")
        
        # Add general guidance if no specific patterns found
        if not interpretations:
            interpretations.append("The analysis did not reveal strong patterns in the similarity metrics. "
                                 "This could indicate that the texts are either very similar or very different "
                                 "across all measured dimensions.")
        
        return "\n\n".join(interpretations)
    
    def _create_llm_prompt(self, df: pd.DataFrame) -> str:
        """
        Create a prompt for the LLM based on the DataFrame.
        
        Args:
            df: Prepared DataFrame with metrics
            
        Returns:
            str: Formatted prompt for the LLM
        """
        # Format the CSV data for the prompt
        csv_data = df.to_csv(index=False)
        
        # Create the prompt using the user's template
        prompt = """You are a specialized text analysis interpreter with expertise in Tibetan textual studies. Your task is to analyze text similarity data from a CSV file and create a clear, narrative explanation for scholars who may not have technical expertise.

<CONTEXT>
This data comes from a text similarity analysis tool designed for various genres of Tibetan sources including historical, religious, literary, and philosophical texts. The tool compares texts using multiple linguistic metrics:
- Jaccard Similarity (%): Measures word overlap between texts (higher % = more similar)
- Normalized LCS: Longest Common Subsequence, measuring sequential text patterns
- Semantic Similarity: Deep meaning comparison using sentence transformers or fasttext
- TF-IDF Cosine Similarity: Term frequency-inverse document frequency comparison
The "Chapter" column indicates which chapter/section of the texts is being compared.
</CONTEXT>

<INSTRUCTIONS>
1. Begin by identifying the specific texts being compared in the data (e.g., "Japan13.txt vs Dolanji.txt").

2. Create a dual-layer narrative analysis (800-1000 words) that includes:
   a) A high-level overview of text similarity patterns accessible to non-technical readers
   b) A more detailed analysis for scholars interested in specific textual relationships

3. In your analysis:
   - Summarize overall similarity patterns between the texts across all chapters
   - Identify which chapters show strongest similarities and differences
   - Explain whether similarities appear to be more lexical (Jaccard, LCS) or conceptual (Semantic)
   - Interpret what these patterns might suggest about textual relationships, transmission, or variant histories
   - Note any interesting anomalies (e.g., chapters with high semantic but low lexical similarity)

4. Structure your analysis with:
   - An introduction explaining the texts compared and general observations
   - A section on overall patterns across all chapters with visualized trends
   - A detailed examination of 2-3 notable chapters (highest/lowest similarity)
   - A discussion of what different metrics reveal about textual relationships
   - A conclusion suggesting what these patterns might mean for Tibetan textual scholarship
   - 2-3 specific questions these findings raise for further investigation

5. Connect your analysis to common interests in Tibetan textual studies such as:
   - Textual transmission and lineages
   - Regional variants and dialectical differences
   - Potential historical relationships between texts
   - Original vs. commentary material identification

6. Consider using a "family tree" analogy to make the textual relationships more intuitive. For example:
   - Texts with very high similarity (>80%) might be described as "siblings" from the same direct source
   - Texts with moderate similarity (50-80%) could be "cousins" sharing a common ancestor but with separate development
   - Texts with low similarity (<50%) might be "distant relatives" with only fundamental connections
   Use this metaphor if it helps clarify the relationships, but don't force it if another explanation would be clearer.

7. **Important note on perfect or zero similarity matches:**  
   If you notice that all metrics indicate perfect or near-perfect similarity (for example, scores of 1.0/100 across all metrics for a chapter) or 0 for a complete mismatch, this may not indicate true textual identity or lack thereof. Instead, it likely means both corresponding text cells were empty or contained no content. In these cases, be sure to clarify in your narrative that such results are *artifacts of missing data, not genuine textual matches*, and should be interpreted with caution.

8. Balance scholarly precision with accessibility, explaining technical concepts when necessary while keeping the overall narrative engaging for non-technical readers.
</INSTRUCTIONS>

Here is the CSV data to analyze:
[CSV_DATA]
"""
        
        # Replace [CSV_DATA] with the actual CSV data
        prompt = prompt.replace("[CSV_DATA]", csv_data)
        
        return prompt
    
    def _get_system_prompt(self) -> str:
        """Get the system prompt for the LLM."""
        return """
        You are a senior scholar of Tibetan Buddhist texts with expertise in textual criticism and 
        comparative analysis. Your task is to analyze the provided similarity metrics and provide 
        expert-level insights into the relationships between these Tibetan texts.
        
        CRITICAL INSTRUCTIONS:
        1. Your analysis MUST be grounded in the specific metrics provided
        2. Always reference actual text names and metric values when making claims
        3. Focus on what the data shows, not what it might show
        4. Be precise and avoid vague or generic statements
        
        ANALYSIS APPROACH:
        1. Begin with a brief executive summary of the most significant findings
        2. Group similar text pairs and explain their relationships
        3. Highlight any patterns that suggest textual transmission or common sources
        4. Note any anomalies or unexpected results that merit further investigation
        5. Provide specific examples from the data to support your analysis
        
        TIBETAN TEXT-SPECIFIC GUIDANCE:
        - Consider the implications of shared vocabulary in the context of Tibetan Buddhist literature
        - Be aware that high LCS scores might indicate shared liturgical or formulaic language
        - Note that texts with similar Jaccard but different LCS scores might share content but differ in structure
        - Consider the possibility of text reuse, commentary traditions, or shared sources
        
        Your analysis should be scholarly but accessible, providing clear insights that would be 
        valuable to researchers studying these texts.
        """
    
    def _call_openrouter_api(
        self,
        prompt: str,
        system_message: str = None,
        max_tokens: int = None,
        temperature: float = None,
        top_p: float = None
    ) -> str:
        """
        Call the OpenRouter API.
        
        Args:
            prompt: The user prompt
            system_message: Optional system message
            max_tokens: Maximum tokens for the response
            temperature: Sampling temperature
            top_p: Nucleus sampling parameter
            
        Returns:
            str: The API response
            
        Raises:
            ValueError: If API key is missing or invalid
            requests.exceptions.RequestException: For network-related errors
            Exception: For other API-related errors
        """
        if not self.api_key:
            error_msg = "OpenRouter API key not provided. Please set the OPENROUTER_API_KEY environment variable."
            logger.error(error_msg)
            raise ValueError(error_msg)
        
        url = "https://openrouter.ai/api/v1/chat/completions"
        
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://github.com/daniel-wojahn/tibetan-text-metrics",
            "X-Title": "Tibetan Text Metrics"
        }
        
        messages = []
        if system_message:
            messages.append({"role": "system", "content": system_message})
        messages.append({"role": "user", "content": prompt})
        
        data = {
            "model": self.model,
            "messages": messages,
            "max_tokens": max_tokens or DEFAULT_MAX_TOKENS,
            "temperature": temperature or self.temperature,
            "top_p": top_p or self.top_p,
        }
        
        try:
            logger.info(f"Calling OpenRouter API with model: {self.model}")
            response = requests.post(url, headers=headers, json=data, timeout=60)
            
            # Handle different HTTP status codes
            if response.status_code == 200:
                result = response.json()
                if 'choices' in result and len(result['choices']) > 0:
                    return result['choices'][0]['message']['content'].strip()
                else:
                    error_msg = "Unexpected response format from OpenRouter API"
                    logger.error(f"{error_msg}: {result}")
                    raise ValueError(error_msg)
                    
            elif response.status_code == 401:
                error_msg = "Invalid OpenRouter API key. Please check your API key and try again."
                logger.error(error_msg)
                raise ValueError(error_msg)
                
            elif response.status_code == 402:
                error_msg = "OpenRouter API payment required. Please check your OpenRouter account balance or billing status."
                logger.error(error_msg)
                raise ValueError(error_msg)
                
            elif response.status_code == 429:
                error_msg = "API rate limit exceeded. Please try again later or check your OpenRouter rate limits."
                logger.error(error_msg)
                raise ValueError(error_msg)
                
            else:
                error_msg = f"OpenRouter API error: {response.status_code} - {response.text}"
                logger.error(error_msg)
                raise Exception(error_msg)
                
        except requests.exceptions.RequestException as e:
            error_msg = f"Failed to connect to OpenRouter API: {str(e)}"
            logger.error(error_msg)
            raise Exception(error_msg) from e
            
        except json.JSONDecodeError as e:
            error_msg = f"Failed to parse OpenRouter API response: {str(e)}"
            logger.error(error_msg)
            raise Exception(error_msg) from e
    
    def _format_llm_response(self, response: str, df: pd.DataFrame) -> str:
        """
        Format the LLM response for display.
        
        Args:
            response: Raw LLM response
            df: Original DataFrame for reference
            
        Returns:
            str: Formatted response with fallback if needed
        """
        # Basic validation
        if not response or len(response) < 100:
            raise ValueError("Response too short or empty")
        
        # Check for garbled output (random numbers, nonsensical patterns)
        # This is a simple heuristic - look for long sequences of numbers or strange patterns
        suspicious_patterns = [
            r'\d{8,}',  # Long number sequences
            r'[0-9,.]{20,}',  # Long sequences of digits, commas and periods
            r'[\W]{20,}',  # Long sequences of non-word characters
        ]
        
        for pattern in suspicious_patterns:
            if re.search(pattern, response):
                logger.warning(f"Detected potentially garbled output matching pattern: {pattern}")
                # Don't immediately raise - we'll do a more comprehensive check
        
        # Check for content quality - ensure it has expected sections
        expected_content = [
            "introduction", "analysis", "similarity", "patterns", "conclusion", "question"
        ]
        
        # Count how many expected content markers we find
        content_matches = sum(1 for term in expected_content if term.lower() in response.lower())
        
        # If we find fewer than 3 expected content markers, it's likely not a good analysis
        if content_matches < 3:
            logger.warning(f"LLM response missing expected content sections (found {content_matches}/6)")
            raise ValueError("Response does not contain expected analysis sections")
        
        # Check for text names from the dataset
        # Extract text names from the Text Pair column
        text_names = set()
        if "Text Pair" in df.columns:
            for pair in df["Text Pair"]:
                if isinstance(pair, str) and " vs " in pair:
                    texts = pair.split(" vs ")
                    text_names.update(texts)
        
        # Check if at least some text names appear in the response
        text_name_matches = sum(1 for name in text_names if name in response)
        if text_names and text_name_matches == 0:
            logger.warning("LLM response does not mention any of the text names from the dataset")
            raise ValueError("Response does not reference any of the analyzed texts")
        
        # Ensure basic markdown structure
        if '##' not in response:
            response = f"## Analysis of Tibetan Text Similarity\n\n{response}"
        
        # Add styling to make the output more readable
        response = f"<div class='llm-analysis'>\n{response}\n</div>"
        
        return response


def get_interpretation(results_df: pd.DataFrame, use_llm: bool = True) -> str:
    """
    Get an interpretation of the similarity metrics.
    
    This is a convenience function that creates an LLMService instance
    and calls analyze_similarity with default parameters.
    
    Args:
        results_df: DataFrame containing similarity metrics
        use_llm: Whether to use LLM for analysis (falls back to rule-based if False or on error)
        
    Returns:
        str: Analysis of the metrics in markdown format
    """
    service = LLMService()
    return service.analyze_similarity(results_df, use_llm=use_llm)