# models/legal_analysis.py import re from .model_loader import load_model from .logging_config import logger from typing import Dict, Any, List, Tuple def analyze_legal_details(legal_text: str) -> Dict[str, Any]: """Analyze legal details of a property with comprehensive validation.""" try: if not legal_text or len(legal_text.strip()) < 5: return { 'assessment': 'insufficient', 'confidence': 0.0, 'summary': 'No legal details provided', 'completeness_score': 0, 'potential_issues': False, 'legal_metrics': {}, 'reasoning': 'No legal details provided for analysis', 'top_classifications': [], 'document_verification': {}, 'compliance_status': {}, 'risk_assessment': {} } classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") # Enhanced legal categories with more specific indicators categories = [ # Title and Ownership "clear title documentation", "title verification documents", "ownership transfer documents", "inheritance documents", "gift deed documents", "power of attorney documents", # Property Registration "property registration documents", "sale deed documents", "conveyance deed documents", "development agreement documents", "joint development agreement documents", # Tax and Financial "property tax records", "tax clearance certificates", "encumbrance certificates", "bank loan documents", "mortgage documents", # Approvals and Permits "building permits", "construction approvals", "occupation certificates", "completion certificates", "environmental clearances", # Land and Usage "land use certificates", "zoning certificates", "layout approvals", "master plan compliance", "land conversion documents", # Compliance and Legal "legal compliance certificates", "no objection certificates", "fire safety certificates", "structural stability certificates", "water and electricity compliance", # Disputes and Litigation "property dispute records", "litigation history", "court orders", "settlement agreements", "pending legal cases" ] # Create a more detailed context for analysis legal_context = f""" Legal Documentation Analysis: {legal_text[:1000]} Key aspects to verify: 1. Title and Ownership: - Clear title documentation - Ownership transfer history - Inheritance/gift documentation - Power of attorney status 2. Property Registration: - Sale deed validity - Registration status - Development agreements - Joint development status 3. Tax and Financial: - Property tax compliance - Tax clearance status - Encumbrance status - Mortgage/loan status 4. Approvals and Permits: - Building permit validity - Construction approvals - Occupation certificates - Environmental clearances 5. Land and Usage: - Land use compliance - Zoning regulations - Layout approvals - Master plan compliance 6. Compliance and Legal: - Legal compliance status - Safety certificates - Utility compliance - Regulatory approvals 7. Disputes and Litigation: - Dispute history - Court orders - Settlement status - Pending cases """ # Analyze legal text with multiple aspects legal_result = classifier(legal_context, categories, multi_label=True) # Get top classifications with confidence scores top_classifications = [] for label, score in zip(legal_result['labels'][:5], legal_result['scores'][:5]): if score > 0.3: # Only include if confidence is above 30% top_classifications.append({ 'classification': label, 'confidence': float(score) }) # Generate summary using BART summary = summarize_text(legal_text[:1000]) # Calculate detailed legal metrics legal_metrics = { 'title_and_ownership': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['clear title documentation', 'title verification documents', 'ownership transfer documents', 'inheritance documents']), 'property_registration': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['property registration documents', 'sale deed documents', 'conveyance deed documents', 'development agreement documents']), 'tax_and_financial': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['property tax records', 'tax clearance certificates', 'encumbrance certificates', 'bank loan documents']), 'approvals_and_permits': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['building permits', 'construction approvals', 'occupation certificates', 'completion certificates']), 'land_and_usage': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['land use certificates', 'zoning certificates', 'layout approvals', 'master plan compliance']), 'compliance_and_legal': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['legal compliance certificates', 'no objection certificates', 'fire safety certificates', 'structural stability certificates']), 'disputes_and_litigation': sum(score for label, score in zip(legal_result['labels'], legal_result['scores']) if label in ['property dispute records', 'litigation history', 'court orders', 'pending legal cases']) } # Calculate completeness score with weighted components weights = { 'title_and_ownership': 0.25, 'property_registration': 0.20, 'tax_and_financial': 0.15, 'approvals_and_permits': 0.15, 'land_and_usage': 0.10, 'compliance_and_legal': 0.10, 'disputes_and_litigation': 0.05 } completeness_score = sum( legal_metrics[category] * weight * 100 for category, weight in weights.items() ) # Determine if there are potential issues potential_issues = legal_metrics['disputes_and_litigation'] > 0.3 # Generate detailed reasoning reasoning_parts = [] # Document verification status document_verification = { 'title_documents': { 'status': 'verified' if legal_metrics['title_and_ownership'] > 0.7 else 'partial' if legal_metrics['title_and_ownership'] > 0.4 else 'missing', 'score': legal_metrics['title_and_ownership'] * 100 }, 'registration_documents': { 'status': 'verified' if legal_metrics['property_registration'] > 0.7 else 'partial' if legal_metrics['property_registration'] > 0.4 else 'missing', 'score': legal_metrics['property_registration'] * 100 }, 'tax_documents': { 'status': 'verified' if legal_metrics['tax_and_financial'] > 0.7 else 'partial' if legal_metrics['tax_and_financial'] > 0.4 else 'missing', 'score': legal_metrics['tax_and_financial'] * 100 }, 'approval_documents': { 'status': 'verified' if legal_metrics['approvals_and_permits'] > 0.7 else 'partial' if legal_metrics['approvals_and_permits'] > 0.4 else 'missing', 'score': legal_metrics['approvals_and_permits'] * 100 } } # Compliance status compliance_status = { 'land_use': { 'status': 'compliant' if legal_metrics['land_and_usage'] > 0.7 else 'partial' if legal_metrics['land_and_usage'] > 0.4 else 'non-compliant', 'score': legal_metrics['land_and_usage'] * 100 }, 'legal_compliance': { 'status': 'compliant' if legal_metrics['compliance_and_legal'] > 0.7 else 'partial' if legal_metrics['compliance_and_legal'] > 0.4 else 'non-compliant', 'score': legal_metrics['compliance_and_legal'] * 100 } } # Risk assessment risk_assessment = { 'litigation_risk': { 'level': 'high' if legal_metrics['disputes_and_litigation'] > 0.6 else 'medium' if legal_metrics['disputes_and_litigation'] > 0.3 else 'low', 'score': legal_metrics['disputes_and_litigation'] * 100 }, 'documentation_risk': { 'level': 'high' if completeness_score < 50 else 'medium' if completeness_score < 70 else 'low', 'score': 100 - completeness_score } } # Generate reasoning based on all metrics if top_classifications: primary_class = top_classifications[0]['classification'] confidence = top_classifications[0]['confidence'] reasoning_parts.append(f"Primary assessment: {primary_class} (confidence: {confidence:.0%})") # Add document verification status for doc_type, status in document_verification.items(): reasoning_parts.append(f"{doc_type.replace('_', ' ').title()}: {status['status']} (score: {status['score']:.0f}%)") # Add compliance status for compliance_type, status in compliance_status.items(): reasoning_parts.append(f"{compliance_type.replace('_', ' ').title()}: {status['status']} (score: {status['score']:.0f}%)") # Add risk assessment for risk_type, assessment in risk_assessment.items(): reasoning_parts.append(f"{risk_type.replace('_', ' ').title()}: {assessment['level']} risk (score: {assessment['score']:.0f}%)") # Calculate overall confidence overall_confidence = min(1.0, ( legal_metrics['title_and_ownership'] * 0.3 + legal_metrics['property_registration'] * 0.2 + legal_metrics['tax_and_financial'] * 0.15 + legal_metrics['approvals_and_permits'] * 0.15 + legal_metrics['land_and_usage'] * 0.1 + legal_metrics['compliance_and_legal'] * 0.1 )) return { 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess', 'confidence': float(overall_confidence), 'summary': summary, 'completeness_score': int(completeness_score), 'potential_issues': potential_issues, 'legal_metrics': legal_metrics, 'reasoning': '. '.join(reasoning_parts), 'top_classifications': top_classifications, 'document_verification': document_verification, 'compliance_status': compliance_status, 'risk_assessment': risk_assessment } except Exception as e: logger.error(f"Error analyzing legal details: {str(e)}") return { 'assessment': 'could not assess', 'confidence': 0.0, 'summary': 'Error analyzing legal details', 'completeness_score': 0, 'potential_issues': False, 'legal_metrics': {}, 'reasoning': 'Technical error occurred during analysis', 'top_classifications': [], 'document_verification': {}, 'compliance_status': {}, 'risk_assessment': {} }