Spaces:

sksameermujahid
/

property_verification_bot

Sleeping

File size: 10,482 Bytes

14cb7ae

# models/price_analysis.py

import re
from .model_loader import load_model
from .logging_config import logger

def analyze_price(data):
    try:
        # Safely convert price to float
        price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip()
        price = float(price_str) if price_str else 0
        
        # Safely convert sq_ft to float
        sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip()
        sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0
        
        price_per_sqft = price / sq_ft if sq_ft else 0

        if not price:
            return {
                'assessment': 'no price',
                'confidence': 0.0,
                'price': 0,
                'formatted_price': '₹0',
                'price_per_sqft': 0,
                'formatted_price_per_sqft': '₹0',
                'price_range': 'unknown',
                'location_price_assessment': 'cannot assess',
                'has_price': False,
                'market_trends': {},
                'price_factors': {},
                'risk_indicators': []
            }

        # Use a more sophisticated model for price analysis
        classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")

        # Create a detailed context for price analysis
        price_context = f"""
        Property Type: {data.get('property_type', '')}
        Location: {data.get('city', '')}, {data.get('state', '')}
        Size: {sq_ft} sq.ft.
        Price: ₹{price:,.2f}
        Price per sq.ft.: ₹{price_per_sqft:,.2f}
        Property Status: {data.get('status', '')}
        Year Built: {data.get('year_built', '')}
        Bedrooms: {data.get('bedrooms', '')}
        Bathrooms: {data.get('bathrooms', '')}
        Amenities: {data.get('amenities', '')}
        """

        # Enhanced price categories with more specific indicators
        price_categories = [
            "reasonable market price",
            "suspiciously low price",
            "suspiciously high price",
            "average market price",
            "luxury property price",
            "budget property price",
            "premium property price",
            "mid-range property price",
            "overpriced for location",
            "underpriced for location",
            "price matches amenities",
            "price matches property age",
            "price matches location value",
            "price matches property condition",
            "price matches market trends"
        ]

        # Analyze price with multiple aspects
        price_result = classifier(price_context, price_categories, multi_label=True)

        # Get top classifications with enhanced confidence calculation
        top_classifications = []
        for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
            if score > 0.25:  # Lower threshold for better sensitivity
                top_classifications.append({
                    'classification': label,
                    'confidence': float(score)
                })

        # Determine price range based on AI classification and market data
        price_range = 'unknown'
        if top_classifications:
            primary_class = top_classifications[0]['classification']
            if 'luxury' in primary_class:
                price_range = 'luxury'
            elif 'premium' in primary_class:
                price_range = 'premium'
            elif 'mid-range' in primary_class:
                price_range = 'mid_range'
            elif 'budget' in primary_class:
                price_range = 'budget'

        # Enhanced location-specific price assessment
        location_assessment = "unknown"
        market_trends = {}
        if data.get('city') and price_per_sqft:
            city_lower = data['city'].lower()
            metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]

            # Define price ranges for different city tiers
            if any(city in city_lower for city in metro_cities):
                market_trends = {
                    'city_tier': 'metro',
                    'avg_price_range': {
                        'min': 5000,
                        'max': 30000,
                        'trend': 'stable'
                    },
                    'price_per_sqft': {
                        'current': price_per_sqft,
                        'market_avg': 15000,
                        'deviation': abs(price_per_sqft - 15000) / 15000 * 100
                    }
                }
                location_assessment = (
                    "reasonable" if 5000 <= price_per_sqft <= 30000 else
                    "suspiciously low" if price_per_sqft < 5000 else
                    "suspiciously high"
                )
            else:
                market_trends = {
                    'city_tier': 'non-metro',
                    'avg_price_range': {
                        'min': 1500,
                        'max': 15000,
                        'trend': 'stable'
                    },
                    'price_per_sqft': {
                        'current': price_per_sqft,
                        'market_avg': 7500,
                        'deviation': abs(price_per_sqft - 7500) / 7500 * 100
                    }
                }
                location_assessment = (
                    "reasonable" if 1500 <= price_per_sqft <= 15000 else
                    "suspiciously low" if price_per_sqft < 1500 else
                    "suspiciously high"
                )

        # Enhanced price analysis factors
        price_factors = {}
        risk_indicators = []

        # Property age factor
        try:
            year_built = int(data.get('year_built', 0))
            current_year = datetime.now().year
            property_age = current_year - year_built

            if property_age > 0:
                depreciation_factor = max(0.5, 1 - (property_age * 0.01))  # 1% depreciation per year, min 50%
                price_factors['age_factor'] = {
                    'property_age': property_age,
                    'depreciation_factor': depreciation_factor,
                    'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
                }
        except:
            price_factors['age_factor'] = {'error': 'Invalid year built'}

        # Size factor
        if sq_ft > 0:
            size_factor = {
                'size': sq_ft,
                'price_per_sqft': price_per_sqft,
                'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
            }
            price_factors['size_factor'] = size_factor

            # Add risk indicators based on size
            if sq_ft < 300:
                risk_indicators.append('Unusually small property size')
            elif sq_ft > 10000:
                risk_indicators.append('Unusually large property size')

        # Amenities factor
        if data.get('amenities'):
            amenities_list = [a.strip() for a in data['amenities'].split(',')]
            amenities_score = min(1.0, len(amenities_list) * 0.1)  # 10% per amenity, max 100%
            price_factors['amenities_factor'] = {
                'count': len(amenities_list),
                'score': amenities_score,
                'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
            }

        # Calculate overall confidence with weighted factors
        confidence_weights = {
            'primary_classification': 0.3,
            'location_assessment': 0.25,
            'age_factor': 0.2,
            'size_factor': 0.15,
            'amenities_factor': 0.1
        }
        confidence_scores = []

        # Primary classification confidence
        if top_classifications:
            confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])

        # Location assessment confidence
        location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
        confidence_scores.append(location_confidence * confidence_weights['location_assessment'])

        # Age factor confidence
        if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
            age_confidence = price_factors['age_factor']['depreciation_factor']
            confidence_scores.append(age_confidence * confidence_weights['age_factor'])

        # Size factor confidence
        if 'size_factor' in price_factors:
            size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
            confidence_scores.append(size_confidence * confidence_weights['size_factor'])

        # Amenities factor confidence
        if 'amenities_factor' in price_factors:
            amenities_confidence = price_factors['amenities_factor']['score']
            confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])

        overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())

        return {
            'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
            'confidence': float(overall_confidence),
            'price': price,
            'formatted_price': f"₹{price:,.0f}",
            'price_per_sqft': price_per_sqft,
            'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
            'price_range': price_range,
            'location_price_assessment': location_assessment,
            'has_price': True,
            'market_trends': market_trends,
            'price_factors': price_factors,
            'risk_indicators': risk_indicators,
            'top_classifications': top_classifications
        }
    except Exception as e:
        logger.error(f"Error analyzing price: {str(e)}")
        return {
            'assessment': 'error',
            'confidence': 0.0,
            'price': 0,
            'formatted_price': '₹0',
            'price_per_sqft': 0,
            'formatted_price_per_sqft': '₹0',
            'price_range': 'unknown',
            'location_price_assessment': 'error',
            'has_price': False,
            'market_trends': {},
            'price_factors': {},
            'risk_indicators': [],
            'top_classifications': []
        }