property_verification_bot / models /price_analysis.py
sksameermujahid's picture
Upload 45 files
14cb7ae verified
# models/price_analysis.py
import re
from .model_loader import load_model
from .logging_config import logger
def analyze_price(data):
try:
# Safely convert price to float
price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip()
price = float(price_str) if price_str else 0
# Safely convert sq_ft to float
sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip()
sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0
price_per_sqft = price / sq_ft if sq_ft else 0
if not price:
return {
'assessment': 'no price',
'confidence': 0.0,
'price': 0,
'formatted_price': '₹0',
'price_per_sqft': 0,
'formatted_price_per_sqft': '₹0',
'price_range': 'unknown',
'location_price_assessment': 'cannot assess',
'has_price': False,
'market_trends': {},
'price_factors': {},
'risk_indicators': []
}
# Use a more sophisticated model for price analysis
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
# Create a detailed context for price analysis
price_context = f"""
Property Type: {data.get('property_type', '')}
Location: {data.get('city', '')}, {data.get('state', '')}
Size: {sq_ft} sq.ft.
Price: ₹{price:,.2f}
Price per sq.ft.: ₹{price_per_sqft:,.2f}
Property Status: {data.get('status', '')}
Year Built: {data.get('year_built', '')}
Bedrooms: {data.get('bedrooms', '')}
Bathrooms: {data.get('bathrooms', '')}
Amenities: {data.get('amenities', '')}
"""
# Enhanced price categories with more specific indicators
price_categories = [
"reasonable market price",
"suspiciously low price",
"suspiciously high price",
"average market price",
"luxury property price",
"budget property price",
"premium property price",
"mid-range property price",
"overpriced for location",
"underpriced for location",
"price matches amenities",
"price matches property age",
"price matches location value",
"price matches property condition",
"price matches market trends"
]
# Analyze price with multiple aspects
price_result = classifier(price_context, price_categories, multi_label=True)
# Get top classifications with enhanced confidence calculation
top_classifications = []
for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
if score > 0.25: # Lower threshold for better sensitivity
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Determine price range based on AI classification and market data
price_range = 'unknown'
if top_classifications:
primary_class = top_classifications[0]['classification']
if 'luxury' in primary_class:
price_range = 'luxury'
elif 'premium' in primary_class:
price_range = 'premium'
elif 'mid-range' in primary_class:
price_range = 'mid_range'
elif 'budget' in primary_class:
price_range = 'budget'
# Enhanced location-specific price assessment
location_assessment = "unknown"
market_trends = {}
if data.get('city') and price_per_sqft:
city_lower = data['city'].lower()
metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]
# Define price ranges for different city tiers
if any(city in city_lower for city in metro_cities):
market_trends = {
'city_tier': 'metro',
'avg_price_range': {
'min': 5000,
'max': 30000,
'trend': 'stable'
},
'price_per_sqft': {
'current': price_per_sqft,
'market_avg': 15000,
'deviation': abs(price_per_sqft - 15000) / 15000 * 100
}
}
location_assessment = (
"reasonable" if 5000 <= price_per_sqft <= 30000 else
"suspiciously low" if price_per_sqft < 5000 else
"suspiciously high"
)
else:
market_trends = {
'city_tier': 'non-metro',
'avg_price_range': {
'min': 1500,
'max': 15000,
'trend': 'stable'
},
'price_per_sqft': {
'current': price_per_sqft,
'market_avg': 7500,
'deviation': abs(price_per_sqft - 7500) / 7500 * 100
}
}
location_assessment = (
"reasonable" if 1500 <= price_per_sqft <= 15000 else
"suspiciously low" if price_per_sqft < 1500 else
"suspiciously high"
)
# Enhanced price analysis factors
price_factors = {}
risk_indicators = []
# Property age factor
try:
year_built = int(data.get('year_built', 0))
current_year = datetime.now().year
property_age = current_year - year_built
if property_age > 0:
depreciation_factor = max(0.5, 1 - (property_age * 0.01)) # 1% depreciation per year, min 50%
price_factors['age_factor'] = {
'property_age': property_age,
'depreciation_factor': depreciation_factor,
'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
}
except:
price_factors['age_factor'] = {'error': 'Invalid year built'}
# Size factor
if sq_ft > 0:
size_factor = {
'size': sq_ft,
'price_per_sqft': price_per_sqft,
'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
}
price_factors['size_factor'] = size_factor
# Add risk indicators based on size
if sq_ft < 300:
risk_indicators.append('Unusually small property size')
elif sq_ft > 10000:
risk_indicators.append('Unusually large property size')
# Amenities factor
if data.get('amenities'):
amenities_list = [a.strip() for a in data['amenities'].split(',')]
amenities_score = min(1.0, len(amenities_list) * 0.1) # 10% per amenity, max 100%
price_factors['amenities_factor'] = {
'count': len(amenities_list),
'score': amenities_score,
'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
}
# Calculate overall confidence with weighted factors
confidence_weights = {
'primary_classification': 0.3,
'location_assessment': 0.25,
'age_factor': 0.2,
'size_factor': 0.15,
'amenities_factor': 0.1
}
confidence_scores = []
# Primary classification confidence
if top_classifications:
confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])
# Location assessment confidence
location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
confidence_scores.append(location_confidence * confidence_weights['location_assessment'])
# Age factor confidence
if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
age_confidence = price_factors['age_factor']['depreciation_factor']
confidence_scores.append(age_confidence * confidence_weights['age_factor'])
# Size factor confidence
if 'size_factor' in price_factors:
size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
confidence_scores.append(size_confidence * confidence_weights['size_factor'])
# Amenities factor confidence
if 'amenities_factor' in price_factors:
amenities_confidence = price_factors['amenities_factor']['score']
confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])
overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())
return {
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
'confidence': float(overall_confidence),
'price': price,
'formatted_price': f"₹{price:,.0f}",
'price_per_sqft': price_per_sqft,
'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
'price_range': price_range,
'location_price_assessment': location_assessment,
'has_price': True,
'market_trends': market_trends,
'price_factors': price_factors,
'risk_indicators': risk_indicators,
'top_classifications': top_classifications
}
except Exception as e:
logger.error(f"Error analyzing price: {str(e)}")
return {
'assessment': 'error',
'confidence': 0.0,
'price': 0,
'formatted_price': '₹0',
'price_per_sqft': 0,
'formatted_price_per_sqft': '₹0',
'price_range': 'unknown',
'location_price_assessment': 'error',
'has_price': False,
'market_trends': {},
'price_factors': {},
'risk_indicators': [],
'top_classifications': []
}