# models/text_quality.py from .model_loader import load_model from .logging_config import logger def assess_text_quality(text): try: if not text or len(text.strip()) < 20: return { 'assessment': 'insufficient', 'score': 0, 'reasoning': 'Text too short.', 'is_ai_generated': False, 'quality_metrics': {} } classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") # Enhanced quality categories with more specific indicators quality_categories = [ "detailed and informative", "adequately detailed", "basic information", "vague description", "misleading content", "professional listing", "amateur listing", "spam-like content", "template-based content", "authentic description" ] # Analyze text with multiple aspects quality_result = classifier(text[:1000], quality_categories, multi_label=True) # Get top classifications with confidence scores top_classifications = [] for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]): if score > 0.3: # Only include if confidence is above 30% top_classifications.append({ 'classification': label, 'confidence': float(score) }) # AI generation detection with multiple models ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"]) is_ai_generated = ( (ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or (ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7) ) # Calculate quality metrics quality_metrics = { 'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label in ['detailed and informative', 'adequately detailed']), 'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label in ['professional listing', 'authentic description']), 'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label not in ['vague description', 'misleading content', 'spam-like content']), 'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label in ['template-based content', 'spam-like content']) } # Calculate overall score with weighted metrics weights = { 'detail_level': 0.3, 'professionalism': 0.25, 'clarity': 0.25, 'authenticity': 0.2 } score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items()) score = score * 100 # Convert to percentage # Adjust score for AI-generated content if is_ai_generated: score = score * 0.7 # Reduce score by 30% for AI-generated content # Generate detailed reasoning reasoning_parts = [] if top_classifications: primary_class = top_classifications[0]['classification'] reasoning_parts.append(f"Primary assessment: {primary_class}") if quality_metrics['detail_level'] > 0.7: reasoning_parts.append("Contains comprehensive details") elif quality_metrics['detail_level'] > 0.4: reasoning_parts.append("Contains adequate details") else: reasoning_parts.append("Lacks important details") if quality_metrics['professionalism'] > 0.7: reasoning_parts.append("Professional listing style") elif quality_metrics['professionalism'] < 0.4: reasoning_parts.append("Amateur listing style") if quality_metrics['clarity'] < 0.5: reasoning_parts.append("Content clarity issues detected") if is_ai_generated: reasoning_parts.append("Content appears to be AI-generated") return { 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess', 'score': int(score), 'reasoning': '. '.join(reasoning_parts), 'is_ai_generated': is_ai_generated, 'quality_metrics': quality_metrics, 'top_classifications': top_classifications } except Exception as e: logger.error(f"Error assessing text quality: {str(e)}") return { 'assessment': 'could not assess', 'score': 50, 'reasoning': 'Technical error.', 'is_ai_generated': False, 'quality_metrics': {}, 'top_classifications': [] }