sksameermujahid's picture
Upload 45 files
14cb7ae verified
# models/text_quality.py
from .model_loader import load_model
from .logging_config import logger
def assess_text_quality(text):
try:
if not text or len(text.strip()) < 20:
return {
'assessment': 'insufficient',
'score': 0,
'reasoning': 'Text too short.',
'is_ai_generated': False,
'quality_metrics': {}
}
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
# Enhanced quality categories with more specific indicators
quality_categories = [
"detailed and informative",
"adequately detailed",
"basic information",
"vague description",
"misleading content",
"professional listing",
"amateur listing",
"spam-like content",
"template-based content",
"authentic description"
]
# Analyze text with multiple aspects
quality_result = classifier(text[:1000], quality_categories, multi_label=True)
# Get top classifications with confidence scores
top_classifications = []
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
if score > 0.3: # Only include if confidence is above 30%
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# AI generation detection with multiple models
ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"])
is_ai_generated = (
(ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or
(ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7)
)
# Calculate quality metrics
quality_metrics = {
'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in ['detailed and informative', 'adequately detailed']),
'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in ['professional listing', 'authentic description']),
'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label not in ['vague description', 'misleading content', 'spam-like content']),
'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in ['template-based content', 'spam-like content'])
}
# Calculate overall score with weighted metrics
weights = {
'detail_level': 0.3,
'professionalism': 0.25,
'clarity': 0.25,
'authenticity': 0.2
}
score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items())
score = score * 100 # Convert to percentage
# Adjust score for AI-generated content
if is_ai_generated:
score = score * 0.7 # Reduce score by 30% for AI-generated content
# Generate detailed reasoning
reasoning_parts = []
if top_classifications:
primary_class = top_classifications[0]['classification']
reasoning_parts.append(f"Primary assessment: {primary_class}")
if quality_metrics['detail_level'] > 0.7:
reasoning_parts.append("Contains comprehensive details")
elif quality_metrics['detail_level'] > 0.4:
reasoning_parts.append("Contains adequate details")
else:
reasoning_parts.append("Lacks important details")
if quality_metrics['professionalism'] > 0.7:
reasoning_parts.append("Professional listing style")
elif quality_metrics['professionalism'] < 0.4:
reasoning_parts.append("Amateur listing style")
if quality_metrics['clarity'] < 0.5:
reasoning_parts.append("Content clarity issues detected")
if is_ai_generated:
reasoning_parts.append("Content appears to be AI-generated")
return {
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
'score': int(score),
'reasoning': '. '.join(reasoning_parts),
'is_ai_generated': is_ai_generated,
'quality_metrics': quality_metrics,
'top_classifications': top_classifications
}
except Exception as e:
logger.error(f"Error assessing text quality: {str(e)}")
return {
'assessment': 'could not assess',
'score': 50,
'reasoning': 'Technical error.',
'is_ai_generated': False,
'quality_metrics': {},
'top_classifications': []
}