property_verification_bot / models /legal_analysis.py
sksameermujahid's picture
Upload 45 files
14cb7ae verified
# models/legal_analysis.py
import re
from .model_loader import load_model
from .logging_config import logger
from typing import Dict, Any, List, Tuple
def analyze_legal_details(legal_text: str) -> Dict[str, Any]:
"""Analyze legal details of a property with comprehensive validation."""
try:
if not legal_text or len(legal_text.strip()) < 5:
return {
'assessment': 'insufficient',
'confidence': 0.0,
'summary': 'No legal details provided',
'completeness_score': 0,
'potential_issues': False,
'legal_metrics': {},
'reasoning': 'No legal details provided for analysis',
'top_classifications': [],
'document_verification': {},
'compliance_status': {},
'risk_assessment': {}
}
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
# Enhanced legal categories with more specific indicators
categories = [
# Title and Ownership
"clear title documentation",
"title verification documents",
"ownership transfer documents",
"inheritance documents",
"gift deed documents",
"power of attorney documents",
# Property Registration
"property registration documents",
"sale deed documents",
"conveyance deed documents",
"development agreement documents",
"joint development agreement documents",
# Tax and Financial
"property tax records",
"tax clearance certificates",
"encumbrance certificates",
"bank loan documents",
"mortgage documents",
# Approvals and Permits
"building permits",
"construction approvals",
"occupation certificates",
"completion certificates",
"environmental clearances",
# Land and Usage
"land use certificates",
"zoning certificates",
"layout approvals",
"master plan compliance",
"land conversion documents",
# Compliance and Legal
"legal compliance certificates",
"no objection certificates",
"fire safety certificates",
"structural stability certificates",
"water and electricity compliance",
# Disputes and Litigation
"property dispute records",
"litigation history",
"court orders",
"settlement agreements",
"pending legal cases"
]
# Create a more detailed context for analysis
legal_context = f"""
Legal Documentation Analysis:
{legal_text[:1000]}
Key aspects to verify:
1. Title and Ownership:
- Clear title documentation
- Ownership transfer history
- Inheritance/gift documentation
- Power of attorney status
2. Property Registration:
- Sale deed validity
- Registration status
- Development agreements
- Joint development status
3. Tax and Financial:
- Property tax compliance
- Tax clearance status
- Encumbrance status
- Mortgage/loan status
4. Approvals and Permits:
- Building permit validity
- Construction approvals
- Occupation certificates
- Environmental clearances
5. Land and Usage:
- Land use compliance
- Zoning regulations
- Layout approvals
- Master plan compliance
6. Compliance and Legal:
- Legal compliance status
- Safety certificates
- Utility compliance
- Regulatory approvals
7. Disputes and Litigation:
- Dispute history
- Court orders
- Settlement status
- Pending cases
"""
# Analyze legal text with multiple aspects
legal_result = classifier(legal_context, categories, multi_label=True)
# Get top classifications with confidence scores
top_classifications = []
for label, score in zip(legal_result['labels'][:5], legal_result['scores'][:5]):
if score > 0.3: # Only include if confidence is above 30%
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Generate summary using BART
summary = summarize_text(legal_text[:1000])
# Calculate detailed legal metrics
legal_metrics = {
'title_and_ownership': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['clear title documentation', 'title verification documents',
'ownership transfer documents', 'inheritance documents']),
'property_registration': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['property registration documents', 'sale deed documents',
'conveyance deed documents', 'development agreement documents']),
'tax_and_financial': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['property tax records', 'tax clearance certificates',
'encumbrance certificates', 'bank loan documents']),
'approvals_and_permits': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['building permits', 'construction approvals',
'occupation certificates', 'completion certificates']),
'land_and_usage': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['land use certificates', 'zoning certificates',
'layout approvals', 'master plan compliance']),
'compliance_and_legal': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['legal compliance certificates', 'no objection certificates',
'fire safety certificates', 'structural stability certificates']),
'disputes_and_litigation': sum(score for label, score in zip(legal_result['labels'], legal_result['scores'])
if label in ['property dispute records', 'litigation history',
'court orders', 'pending legal cases'])
}
# Calculate completeness score with weighted components
weights = {
'title_and_ownership': 0.25,
'property_registration': 0.20,
'tax_and_financial': 0.15,
'approvals_and_permits': 0.15,
'land_and_usage': 0.10,
'compliance_and_legal': 0.10,
'disputes_and_litigation': 0.05
}
completeness_score = sum(
legal_metrics[category] * weight * 100
for category, weight in weights.items()
)
# Determine if there are potential issues
potential_issues = legal_metrics['disputes_and_litigation'] > 0.3
# Generate detailed reasoning
reasoning_parts = []
# Document verification status
document_verification = {
'title_documents': {
'status': 'verified' if legal_metrics['title_and_ownership'] > 0.7 else 'partial' if legal_metrics['title_and_ownership'] > 0.4 else 'missing',
'score': legal_metrics['title_and_ownership'] * 100
},
'registration_documents': {
'status': 'verified' if legal_metrics['property_registration'] > 0.7 else 'partial' if legal_metrics['property_registration'] > 0.4 else 'missing',
'score': legal_metrics['property_registration'] * 100
},
'tax_documents': {
'status': 'verified' if legal_metrics['tax_and_financial'] > 0.7 else 'partial' if legal_metrics['tax_and_financial'] > 0.4 else 'missing',
'score': legal_metrics['tax_and_financial'] * 100
},
'approval_documents': {
'status': 'verified' if legal_metrics['approvals_and_permits'] > 0.7 else 'partial' if legal_metrics['approvals_and_permits'] > 0.4 else 'missing',
'score': legal_metrics['approvals_and_permits'] * 100
}
}
# Compliance status
compliance_status = {
'land_use': {
'status': 'compliant' if legal_metrics['land_and_usage'] > 0.7 else 'partial' if legal_metrics['land_and_usage'] > 0.4 else 'non-compliant',
'score': legal_metrics['land_and_usage'] * 100
},
'legal_compliance': {
'status': 'compliant' if legal_metrics['compliance_and_legal'] > 0.7 else 'partial' if legal_metrics['compliance_and_legal'] > 0.4 else 'non-compliant',
'score': legal_metrics['compliance_and_legal'] * 100
}
}
# Risk assessment
risk_assessment = {
'litigation_risk': {
'level': 'high' if legal_metrics['disputes_and_litigation'] > 0.6 else 'medium' if legal_metrics['disputes_and_litigation'] > 0.3 else 'low',
'score': legal_metrics['disputes_and_litigation'] * 100
},
'documentation_risk': {
'level': 'high' if completeness_score < 50 else 'medium' if completeness_score < 70 else 'low',
'score': 100 - completeness_score
}
}
# Generate reasoning based on all metrics
if top_classifications:
primary_class = top_classifications[0]['classification']
confidence = top_classifications[0]['confidence']
reasoning_parts.append(f"Primary assessment: {primary_class} (confidence: {confidence:.0%})")
# Add document verification status
for doc_type, status in document_verification.items():
reasoning_parts.append(f"{doc_type.replace('_', ' ').title()}: {status['status']} (score: {status['score']:.0f}%)")
# Add compliance status
for compliance_type, status in compliance_status.items():
reasoning_parts.append(f"{compliance_type.replace('_', ' ').title()}: {status['status']} (score: {status['score']:.0f}%)")
# Add risk assessment
for risk_type, assessment in risk_assessment.items():
reasoning_parts.append(f"{risk_type.replace('_', ' ').title()}: {assessment['level']} risk (score: {assessment['score']:.0f}%)")
# Calculate overall confidence
overall_confidence = min(1.0, (
legal_metrics['title_and_ownership'] * 0.3 +
legal_metrics['property_registration'] * 0.2 +
legal_metrics['tax_and_financial'] * 0.15 +
legal_metrics['approvals_and_permits'] * 0.15 +
legal_metrics['land_and_usage'] * 0.1 +
legal_metrics['compliance_and_legal'] * 0.1
))
return {
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
'confidence': float(overall_confidence),
'summary': summary,
'completeness_score': int(completeness_score),
'potential_issues': potential_issues,
'legal_metrics': legal_metrics,
'reasoning': '. '.join(reasoning_parts),
'top_classifications': top_classifications,
'document_verification': document_verification,
'compliance_status': compliance_status,
'risk_assessment': risk_assessment
}
except Exception as e:
logger.error(f"Error analyzing legal details: {str(e)}")
return {
'assessment': 'could not assess',
'confidence': 0.0,
'summary': 'Error analyzing legal details',
'completeness_score': 0,
'potential_issues': False,
'legal_metrics': {},
'reasoning': 'Technical error occurred during analysis',
'top_classifications': [],
'document_verification': {},
'compliance_status': {},
'risk_assessment': {}
}