File size: 10,482 Bytes
14cb7ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# models/price_analysis.py
import re
from .model_loader import load_model
from .logging_config import logger
def analyze_price(data):
try:
# Safely convert price to float
price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip()
price = float(price_str) if price_str else 0
# Safely convert sq_ft to float
sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip()
sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0
price_per_sqft = price / sq_ft if sq_ft else 0
if not price:
return {
'assessment': 'no price',
'confidence': 0.0,
'price': 0,
'formatted_price': '₹0',
'price_per_sqft': 0,
'formatted_price_per_sqft': '₹0',
'price_range': 'unknown',
'location_price_assessment': 'cannot assess',
'has_price': False,
'market_trends': {},
'price_factors': {},
'risk_indicators': []
}
# Use a more sophisticated model for price analysis
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")
# Create a detailed context for price analysis
price_context = f"""
Property Type: {data.get('property_type', '')}
Location: {data.get('city', '')}, {data.get('state', '')}
Size: {sq_ft} sq.ft.
Price: ₹{price:,.2f}
Price per sq.ft.: ₹{price_per_sqft:,.2f}
Property Status: {data.get('status', '')}
Year Built: {data.get('year_built', '')}
Bedrooms: {data.get('bedrooms', '')}
Bathrooms: {data.get('bathrooms', '')}
Amenities: {data.get('amenities', '')}
"""
# Enhanced price categories with more specific indicators
price_categories = [
"reasonable market price",
"suspiciously low price",
"suspiciously high price",
"average market price",
"luxury property price",
"budget property price",
"premium property price",
"mid-range property price",
"overpriced for location",
"underpriced for location",
"price matches amenities",
"price matches property age",
"price matches location value",
"price matches property condition",
"price matches market trends"
]
# Analyze price with multiple aspects
price_result = classifier(price_context, price_categories, multi_label=True)
# Get top classifications with enhanced confidence calculation
top_classifications = []
for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
if score > 0.25: # Lower threshold for better sensitivity
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Determine price range based on AI classification and market data
price_range = 'unknown'
if top_classifications:
primary_class = top_classifications[0]['classification']
if 'luxury' in primary_class:
price_range = 'luxury'
elif 'premium' in primary_class:
price_range = 'premium'
elif 'mid-range' in primary_class:
price_range = 'mid_range'
elif 'budget' in primary_class:
price_range = 'budget'
# Enhanced location-specific price assessment
location_assessment = "unknown"
market_trends = {}
if data.get('city') and price_per_sqft:
city_lower = data['city'].lower()
metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]
# Define price ranges for different city tiers
if any(city in city_lower for city in metro_cities):
market_trends = {
'city_tier': 'metro',
'avg_price_range': {
'min': 5000,
'max': 30000,
'trend': 'stable'
},
'price_per_sqft': {
'current': price_per_sqft,
'market_avg': 15000,
'deviation': abs(price_per_sqft - 15000) / 15000 * 100
}
}
location_assessment = (
"reasonable" if 5000 <= price_per_sqft <= 30000 else
"suspiciously low" if price_per_sqft < 5000 else
"suspiciously high"
)
else:
market_trends = {
'city_tier': 'non-metro',
'avg_price_range': {
'min': 1500,
'max': 15000,
'trend': 'stable'
},
'price_per_sqft': {
'current': price_per_sqft,
'market_avg': 7500,
'deviation': abs(price_per_sqft - 7500) / 7500 * 100
}
}
location_assessment = (
"reasonable" if 1500 <= price_per_sqft <= 15000 else
"suspiciously low" if price_per_sqft < 1500 else
"suspiciously high"
)
# Enhanced price analysis factors
price_factors = {}
risk_indicators = []
# Property age factor
try:
year_built = int(data.get('year_built', 0))
current_year = datetime.now().year
property_age = current_year - year_built
if property_age > 0:
depreciation_factor = max(0.5, 1 - (property_age * 0.01)) # 1% depreciation per year, min 50%
price_factors['age_factor'] = {
'property_age': property_age,
'depreciation_factor': depreciation_factor,
'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
}
except:
price_factors['age_factor'] = {'error': 'Invalid year built'}
# Size factor
if sq_ft > 0:
size_factor = {
'size': sq_ft,
'price_per_sqft': price_per_sqft,
'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
}
price_factors['size_factor'] = size_factor
# Add risk indicators based on size
if sq_ft < 300:
risk_indicators.append('Unusually small property size')
elif sq_ft > 10000:
risk_indicators.append('Unusually large property size')
# Amenities factor
if data.get('amenities'):
amenities_list = [a.strip() for a in data['amenities'].split(',')]
amenities_score = min(1.0, len(amenities_list) * 0.1) # 10% per amenity, max 100%
price_factors['amenities_factor'] = {
'count': len(amenities_list),
'score': amenities_score,
'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
}
# Calculate overall confidence with weighted factors
confidence_weights = {
'primary_classification': 0.3,
'location_assessment': 0.25,
'age_factor': 0.2,
'size_factor': 0.15,
'amenities_factor': 0.1
}
confidence_scores = []
# Primary classification confidence
if top_classifications:
confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])
# Location assessment confidence
location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
confidence_scores.append(location_confidence * confidence_weights['location_assessment'])
# Age factor confidence
if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
age_confidence = price_factors['age_factor']['depreciation_factor']
confidence_scores.append(age_confidence * confidence_weights['age_factor'])
# Size factor confidence
if 'size_factor' in price_factors:
size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
confidence_scores.append(size_confidence * confidence_weights['size_factor'])
# Amenities factor confidence
if 'amenities_factor' in price_factors:
amenities_confidence = price_factors['amenities_factor']['score']
confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])
overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())
return {
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
'confidence': float(overall_confidence),
'price': price,
'formatted_price': f"₹{price:,.0f}",
'price_per_sqft': price_per_sqft,
'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
'price_range': price_range,
'location_price_assessment': location_assessment,
'has_price': True,
'market_trends': market_trends,
'price_factors': price_factors,
'risk_indicators': risk_indicators,
'top_classifications': top_classifications
}
except Exception as e:
logger.error(f"Error analyzing price: {str(e)}")
return {
'assessment': 'error',
'confidence': 0.0,
'price': 0,
'formatted_price': '₹0',
'price_per_sqft': 0,
'formatted_price_per_sqft': '₹0',
'price_range': 'unknown',
'location_price_assessment': 'error',
'has_price': False,
'market_trends': {},
'price_factors': {},
'risk_indicators': [],
'top_classifications': []
}
|