|
|
|
|
|
import re |
|
from .model_loader import load_model |
|
from .logging_config import logger |
|
|
|
def analyze_price(data): |
|
try: |
|
|
|
price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip() |
|
price = float(price_str) if price_str else 0 |
|
|
|
|
|
sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip() |
|
sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0 |
|
|
|
price_per_sqft = price / sq_ft if sq_ft else 0 |
|
|
|
if not price: |
|
return { |
|
'assessment': 'no price', |
|
'confidence': 0.0, |
|
'price': 0, |
|
'formatted_price': '₹0', |
|
'price_per_sqft': 0, |
|
'formatted_price_per_sqft': '₹0', |
|
'price_range': 'unknown', |
|
'location_price_assessment': 'cannot assess', |
|
'has_price': False, |
|
'market_trends': {}, |
|
'price_factors': {}, |
|
'risk_indicators': [] |
|
} |
|
|
|
|
|
classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli") |
|
|
|
|
|
price_context = f""" |
|
Property Type: {data.get('property_type', '')} |
|
Location: {data.get('city', '')}, {data.get('state', '')} |
|
Size: {sq_ft} sq.ft. |
|
Price: ₹{price:,.2f} |
|
Price per sq.ft.: ₹{price_per_sqft:,.2f} |
|
Property Status: {data.get('status', '')} |
|
Year Built: {data.get('year_built', '')} |
|
Bedrooms: {data.get('bedrooms', '')} |
|
Bathrooms: {data.get('bathrooms', '')} |
|
Amenities: {data.get('amenities', '')} |
|
""" |
|
|
|
|
|
price_categories = [ |
|
"reasonable market price", |
|
"suspiciously low price", |
|
"suspiciously high price", |
|
"average market price", |
|
"luxury property price", |
|
"budget property price", |
|
"premium property price", |
|
"mid-range property price", |
|
"overpriced for location", |
|
"underpriced for location", |
|
"price matches amenities", |
|
"price matches property age", |
|
"price matches location value", |
|
"price matches property condition", |
|
"price matches market trends" |
|
] |
|
|
|
|
|
price_result = classifier(price_context, price_categories, multi_label=True) |
|
|
|
|
|
top_classifications = [] |
|
for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]): |
|
if score > 0.25: |
|
top_classifications.append({ |
|
'classification': label, |
|
'confidence': float(score) |
|
}) |
|
|
|
|
|
price_range = 'unknown' |
|
if top_classifications: |
|
primary_class = top_classifications[0]['classification'] |
|
if 'luxury' in primary_class: |
|
price_range = 'luxury' |
|
elif 'premium' in primary_class: |
|
price_range = 'premium' |
|
elif 'mid-range' in primary_class: |
|
price_range = 'mid_range' |
|
elif 'budget' in primary_class: |
|
price_range = 'budget' |
|
|
|
|
|
location_assessment = "unknown" |
|
market_trends = {} |
|
if data.get('city') and price_per_sqft: |
|
city_lower = data['city'].lower() |
|
metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"] |
|
|
|
|
|
if any(city in city_lower for city in metro_cities): |
|
market_trends = { |
|
'city_tier': 'metro', |
|
'avg_price_range': { |
|
'min': 5000, |
|
'max': 30000, |
|
'trend': 'stable' |
|
}, |
|
'price_per_sqft': { |
|
'current': price_per_sqft, |
|
'market_avg': 15000, |
|
'deviation': abs(price_per_sqft - 15000) / 15000 * 100 |
|
} |
|
} |
|
location_assessment = ( |
|
"reasonable" if 5000 <= price_per_sqft <= 30000 else |
|
"suspiciously low" if price_per_sqft < 5000 else |
|
"suspiciously high" |
|
) |
|
else: |
|
market_trends = { |
|
'city_tier': 'non-metro', |
|
'avg_price_range': { |
|
'min': 1500, |
|
'max': 15000, |
|
'trend': 'stable' |
|
}, |
|
'price_per_sqft': { |
|
'current': price_per_sqft, |
|
'market_avg': 7500, |
|
'deviation': abs(price_per_sqft - 7500) / 7500 * 100 |
|
} |
|
} |
|
location_assessment = ( |
|
"reasonable" if 1500 <= price_per_sqft <= 15000 else |
|
"suspiciously low" if price_per_sqft < 1500 else |
|
"suspiciously high" |
|
) |
|
|
|
|
|
price_factors = {} |
|
risk_indicators = [] |
|
|
|
|
|
try: |
|
year_built = int(data.get('year_built', 0)) |
|
current_year = datetime.now().year |
|
property_age = current_year - year_built |
|
|
|
if property_age > 0: |
|
depreciation_factor = max(0.5, 1 - (property_age * 0.01)) |
|
price_factors['age_factor'] = { |
|
'property_age': property_age, |
|
'depreciation_factor': depreciation_factor, |
|
'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low' |
|
} |
|
except: |
|
price_factors['age_factor'] = {'error': 'Invalid year built'} |
|
|
|
|
|
if sq_ft > 0: |
|
size_factor = { |
|
'size': sq_ft, |
|
'price_per_sqft': price_per_sqft, |
|
'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low' |
|
} |
|
price_factors['size_factor'] = size_factor |
|
|
|
|
|
if sq_ft < 300: |
|
risk_indicators.append('Unusually small property size') |
|
elif sq_ft > 10000: |
|
risk_indicators.append('Unusually large property size') |
|
|
|
|
|
if data.get('amenities'): |
|
amenities_list = [a.strip() for a in data['amenities'].split(',')] |
|
amenities_score = min(1.0, len(amenities_list) * 0.1) |
|
price_factors['amenities_factor'] = { |
|
'count': len(amenities_list), |
|
'score': amenities_score, |
|
'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low' |
|
} |
|
|
|
|
|
confidence_weights = { |
|
'primary_classification': 0.3, |
|
'location_assessment': 0.25, |
|
'age_factor': 0.2, |
|
'size_factor': 0.15, |
|
'amenities_factor': 0.1 |
|
} |
|
confidence_scores = [] |
|
|
|
|
|
if top_classifications: |
|
confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification']) |
|
|
|
|
|
location_confidence = 0.8 if location_assessment == "reasonable" else 0.4 |
|
confidence_scores.append(location_confidence * confidence_weights['location_assessment']) |
|
|
|
|
|
if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']: |
|
age_confidence = price_factors['age_factor']['depreciation_factor'] |
|
confidence_scores.append(age_confidence * confidence_weights['age_factor']) |
|
|
|
|
|
if 'size_factor' in price_factors: |
|
size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6 |
|
confidence_scores.append(size_confidence * confidence_weights['size_factor']) |
|
|
|
|
|
if 'amenities_factor' in price_factors: |
|
amenities_confidence = price_factors['amenities_factor']['score'] |
|
confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor']) |
|
|
|
overall_confidence = sum(confidence_scores) / sum(confidence_weights.values()) |
|
|
|
return { |
|
'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify', |
|
'confidence': float(overall_confidence), |
|
'price': price, |
|
'formatted_price': f"₹{price:,.0f}", |
|
'price_per_sqft': price_per_sqft, |
|
'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}", |
|
'price_range': price_range, |
|
'location_price_assessment': location_assessment, |
|
'has_price': True, |
|
'market_trends': market_trends, |
|
'price_factors': price_factors, |
|
'risk_indicators': risk_indicators, |
|
'top_classifications': top_classifications |
|
} |
|
except Exception as e: |
|
logger.error(f"Error analyzing price: {str(e)}") |
|
return { |
|
'assessment': 'error', |
|
'confidence': 0.0, |
|
'price': 0, |
|
'formatted_price': '₹0', |
|
'price_per_sqft': 0, |
|
'formatted_price_per_sqft': '₹0', |
|
'price_range': 'unknown', |
|
'location_price_assessment': 'error', |
|
'has_price': False, |
|
'market_trends': {}, |
|
'price_factors': {}, |
|
'risk_indicators': [], |
|
'top_classifications': [] |
|
} |
|
|