File size: 5,408 Bytes
14cb7ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# models/trust_score.py

from .model_loader import load_model
from .logging_config import logger

def generate_trust_score(text, image_analysis, pdf_analysis):
    try:
        classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
        aspects = [
            "complete information provided",
            "verified location",
            "consistent data",
            "authentic documents",
            "authentic images",
            "reasonable pricing",
            "verified ownership",
            "proper documentation"
        ]
        result = classifier(text[:1000], aspects, multi_label=True)

        # Much stricter weights with higher emphasis on critical aspects
        weights = {
            "complete information provided": 0.25,
            "verified location": 0.20,
            "consistent data": 0.15,
            "authentic documents": 0.15,
            "authentic images": 0.10,
            "reasonable pricing": 0.05,
            "verified ownership": 0.05,
            "proper documentation": 0.05
        }

        score = 0
        reasoning_parts = []

        # Much stricter scoring for each aspect
        for label, confidence in zip(result['labels'], result['scores']):
            adjusted_confidence = confidence

            # Stricter document verification
            if label == "authentic documents":
                if not pdf_analysis or len(pdf_analysis) == 0:
                    adjusted_confidence = 0.0
                else:
                    doc_scores = [p.get('verification_score', 0) for p in pdf_analysis]
                    adjusted_confidence = sum(doc_scores) / max(1, len(doc_scores))
                    # Heavily penalize if any document has low verification score
                    if any(score < 0.7 for score in doc_scores):
                        adjusted_confidence *= 0.4
                    # Additional penalty for missing documents
                    if len(doc_scores) < 2:
                        adjusted_confidence *= 0.5

            # Stricter image verification
            elif label == "authentic images":
                if not image_analysis or len(image_analysis) == 0:
                    adjusted_confidence = 0.0
                else:
                    img_scores = [i.get('authenticity_score', 0) for i in image_analysis]
                    adjusted_confidence = sum(img_scores) / max(1, len(img_scores))
                    # Heavily penalize if any image has low authenticity score
                    if any(score < 0.8 for score in img_scores):
                        adjusted_confidence *= 0.4
                    # Additional penalty for AI-generated images
                    if any(i.get('is_ai_generated', False) for i in image_analysis):
                        adjusted_confidence *= 0.5
                    # Additional penalty for non-property related images
                    if any(not i.get('is_property_related', False) for i in image_analysis):
                        adjusted_confidence *= 0.6

            # Stricter consistency check
            elif label == "consistent data":
                # Check for inconsistencies in the data
                if "inconsistent" in text.lower() or "suspicious" in text.lower():
                    adjusted_confidence *= 0.3
                # Check for impossible values
                if "impossible" in text.lower() or "invalid" in text.lower():
                    adjusted_confidence *= 0.2
                # Check for missing critical information
                if "missing" in text.lower() or "not provided" in text.lower():
                    adjusted_confidence *= 0.4

            # Stricter completeness check
            elif label == "complete information provided":
                # Check for missing critical information
                if len(text) < 300 or "not provided" in text.lower() or "missing" in text.lower():
                    adjusted_confidence *= 0.4
                # Check for vague or generic descriptions
                if "generic" in text.lower() or "vague" in text.lower():
                    adjusted_confidence *= 0.5
                # Check for suspiciously short descriptions
                if len(text) < 150:
                    adjusted_confidence *= 0.3

            score += adjusted_confidence * weights.get(label, 0.1)
            reasoning_parts.append(f"{label} ({adjusted_confidence:.0%})")

        # Apply additional penalties for suspicious patterns
        if "suspicious" in text.lower() or "fraudulent" in text.lower():
            score *= 0.5

        # Apply penalties for suspiciously low values
        if "suspiciously low" in text.lower() or "unusually small" in text.lower():
            score *= 0.6

        # Apply penalties for inconsistencies
        if "inconsistent" in text.lower() or "mismatch" in text.lower():
            score *= 0.6

        # Apply penalties for missing critical information
        if "missing critical" in text.lower() or "incomplete" in text.lower():
            score *= 0.7

        # Ensure score is between 0 and 100
        score = min(100, max(0, int(score * 100)))
        reasoning = f"Based on: {', '.join(reasoning_parts)}"
        return score, reasoning
    except Exception as e:
        logger.error(f"Error generating trust score: {str(e)}")
        return 20, "Could not assess trust."