File size: 5,100 Bytes
14cb7ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# models/text_quality.py

from .model_loader import load_model
from .logging_config import logger

def assess_text_quality(text):
    try:
        if not text or len(text.strip()) < 20:
            return {
                'assessment': 'insufficient',
                'score': 0,
                'reasoning': 'Text too short.',
                'is_ai_generated': False,
                'quality_metrics': {}
            }

        classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")

        # Enhanced quality categories with more specific indicators
        quality_categories = [
            "detailed and informative",
            "adequately detailed",
            "basic information",
            "vague description",
            "misleading content",
            "professional listing",
            "amateur listing",
            "spam-like content",
            "template-based content",
            "authentic description"
        ]

        # Analyze text with multiple aspects
        quality_result = classifier(text[:1000], quality_categories, multi_label=True)

        # Get top classifications with confidence scores
        top_classifications = []
        for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
            if score > 0.3:  # Only include if confidence is above 30%
                top_classifications.append({
                    'classification': label,
                    'confidence': float(score)
                })

        # AI generation detection with multiple models
        ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"])
        is_ai_generated = (
            (ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or
            (ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7)
        )

        # Calculate quality metrics
        quality_metrics = {
            'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
                              if label in ['detailed and informative', 'adequately detailed']),
            'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
                                 if label in ['professional listing', 'authentic description']),
            'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
                         if label not in ['vague description', 'misleading content', 'spam-like content']),
            'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
                                    if label in ['template-based content', 'spam-like content'])
        }

        # Calculate overall score with weighted metrics
        weights = {
            'detail_level': 0.3,
            'professionalism': 0.25,
            'clarity': 0.25,
            'authenticity': 0.2
        }

        score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items())
        score = score * 100  # Convert to percentage

        # Adjust score for AI-generated content
        if is_ai_generated:
            score = score * 0.7  # Reduce score by 30% for AI-generated content

        # Generate detailed reasoning
        reasoning_parts = []
        if top_classifications:
            primary_class = top_classifications[0]['classification']
            reasoning_parts.append(f"Primary assessment: {primary_class}")

        if quality_metrics['detail_level'] > 0.7:
            reasoning_parts.append("Contains comprehensive details")
        elif quality_metrics['detail_level'] > 0.4:
            reasoning_parts.append("Contains adequate details")
        else:
            reasoning_parts.append("Lacks important details")

        if quality_metrics['professionalism'] > 0.7:
            reasoning_parts.append("Professional listing style")
        elif quality_metrics['professionalism'] < 0.4:
            reasoning_parts.append("Amateur listing style")

        if quality_metrics['clarity'] < 0.5:
            reasoning_parts.append("Content clarity issues detected")

        if is_ai_generated:
            reasoning_parts.append("Content appears to be AI-generated")

        return {
            'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
            'score': int(score),
            'reasoning': '. '.join(reasoning_parts),
            'is_ai_generated': is_ai_generated,
            'quality_metrics': quality_metrics,
            'top_classifications': top_classifications
        }
    except Exception as e:
        logger.error(f"Error assessing text quality: {str(e)}")
        return {
            'assessment': 'could not assess',
            'score': 50,
            'reasoning': 'Technical error.',
            'is_ai_generated': False,
            'quality_metrics': {},
            'top_classifications': []
        }