|
|
|
|
|
from .model_loader import load_model |
|
from .logging_config import logger |
|
|
|
def assess_text_quality(text): |
|
try: |
|
if not text or len(text.strip()) < 20: |
|
return { |
|
'assessment': 'insufficient', |
|
'score': 0, |
|
'reasoning': 'Text too short.', |
|
'is_ai_generated': False, |
|
'quality_metrics': {} |
|
} |
|
|
|
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") |
|
|
|
|
|
quality_categories = [ |
|
"detailed and informative", |
|
"adequately detailed", |
|
"basic information", |
|
"vague description", |
|
"misleading content", |
|
"professional listing", |
|
"amateur listing", |
|
"spam-like content", |
|
"template-based content", |
|
"authentic description" |
|
] |
|
|
|
|
|
quality_result = classifier(text[:1000], quality_categories, multi_label=True) |
|
|
|
|
|
top_classifications = [] |
|
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]): |
|
if score > 0.3: |
|
top_classifications.append({ |
|
'classification': label, |
|
'confidence': float(score) |
|
}) |
|
|
|
|
|
ai_check = classifier(text[:1000], ["human-written", "AI-generated", "template-based", "authentic"]) |
|
is_ai_generated = ( |
|
(ai_check['labels'][0] == "AI-generated" and ai_check['scores'][0] > 0.6) or |
|
(ai_check['labels'][0] == "template-based" and ai_check['scores'][0] > 0.7) |
|
) |
|
|
|
|
|
quality_metrics = { |
|
'detail_level': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) |
|
if label in ['detailed and informative', 'adequately detailed']), |
|
'professionalism': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) |
|
if label in ['professional listing', 'authentic description']), |
|
'clarity': sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) |
|
if label not in ['vague description', 'misleading content', 'spam-like content']), |
|
'authenticity': 1.0 - sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) |
|
if label in ['template-based content', 'spam-like content']) |
|
} |
|
|
|
|
|
weights = { |
|
'detail_level': 0.3, |
|
'professionalism': 0.25, |
|
'clarity': 0.25, |
|
'authenticity': 0.2 |
|
} |
|
|
|
score = sum(metric * weights[metric_name] for metric_name, metric in quality_metrics.items()) |
|
score = score * 100 |
|
|
|
|
|
if is_ai_generated: |
|
score = score * 0.7 |
|
|
|
|
|
reasoning_parts = [] |
|
if top_classifications: |
|
primary_class = top_classifications[0]['classification'] |
|
reasoning_parts.append(f"Primary assessment: {primary_class}") |
|
|
|
if quality_metrics['detail_level'] > 0.7: |
|
reasoning_parts.append("Contains comprehensive details") |
|
elif quality_metrics['detail_level'] > 0.4: |
|
reasoning_parts.append("Contains adequate details") |
|
else: |
|
reasoning_parts.append("Lacks important details") |
|
|
|
if quality_metrics['professionalism'] > 0.7: |
|
reasoning_parts.append("Professional listing style") |
|
elif quality_metrics['professionalism'] < 0.4: |
|
reasoning_parts.append("Amateur listing style") |
|
|
|
if quality_metrics['clarity'] < 0.5: |
|
reasoning_parts.append("Content clarity issues detected") |
|
|
|
if is_ai_generated: |
|
reasoning_parts.append("Content appears to be AI-generated") |
|
|
|
return { |
|
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess', |
|
'score': int(score), |
|
'reasoning': '. '.join(reasoning_parts), |
|
'is_ai_generated': is_ai_generated, |
|
'quality_metrics': quality_metrics, |
|
'top_classifications': top_classifications |
|
} |
|
except Exception as e: |
|
logger.error(f"Error assessing text quality: {str(e)}") |
|
return { |
|
'assessment': 'could not assess', |
|
'score': 50, |
|
'reasoning': 'Technical error.', |
|
'is_ai_generated': False, |
|
'quality_metrics': {}, |
|
'top_classifications': [] |
|
} |
|
|