File size: 2,233 Bytes
cc02b66
 
 
 
 
 
b906736
cc02b66
 
 
 
 
 
 
 
 
 
 
 
 
ea829c4
4b7f4a7
 
ea829c4
cc02b66
 
 
ea829c4
4b7f4a7
 
cc02b66
 
4b7f4a7
 
cc02b66
 
4b7f4a7
cc02b66
 
ea829c4
cc02b66
 
 
 
 
 
 
 
 
ea829c4
cc02b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from typing import Dict, List, Any
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

class EndpointHandler:
    def __init__(self, path: str = "netandreus/bge-reranker-v2-m3"):
        # Load tokenizer and model
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model = AutoModelForSequenceClassification.from_pretrained(path)
        self.model.eval()

        # Determine the computation device
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Expected input format:
        {
            "inputs": {
                "source_sentence": "Your query here",
                "sentences": ["Document 1", "Document 2", ...]
            },
            "normalize": true  # Optional; defaults to False
        }
        """
        inputs = data.get("inputs", {})
        source_sentence = inputs.get("source_sentence")
        sentences = inputs.get("sentences", [])
        normalize = data.get("normalize", False)

        if not source_sentence or not sentences:
            return [{"error": "Both 'source_sentence' and 'sentences' fields are required inside 'inputs'."}]

        # Prepare input pairs
        pairs = [[source_sentence, text] for text in sentences]

        # Tokenize input pairs
        tokenizer_inputs = self.tokenizer(
            pairs,
            padding=True,
            truncation=True,
            return_tensors="pt",
            max_length=512
        ).to(self.device)

        with torch.no_grad():
            # Get model logits
            outputs = self.model(**tokenizer_inputs)
            scores = outputs.logits.view(-1)

            # Apply sigmoid normalization if requested
            if normalize:
                scores = torch.sigmoid(scores)

        # Prepare the response
        results = [
            {"index": idx, "score": score.item()}
            for idx, score in enumerate(scores)
        ]

        # Sort results by descending score
        results.sort(key=lambda x: x["score"], reverse=True)
        return results