Spaces:

sksameermujahid
/

property_verification_bot

Sleeping

App Files Files Community

property_verification_bot / models /price_analysis.py

sksameermujahid

Upload 45 files

14cb7ae verified 10 days ago

raw

history blame contribute delete

10.5 kB

	# models/price_analysis.py

	import re
	from .model_loader import load_model
	from .logging_config import logger

	def analyze_price(data):
	try:
	# Safely convert price to float
	price_str = str(data.get('market_value', '0')).replace('$', '').replace(',', '').strip()
	price = float(price_str) if price_str else 0

	# Safely convert sq_ft to float
	sq_ft_str = str(data.get('sq_ft', '0')).replace(',', '').strip()
	sq_ft = float(re.sub(r'[^\d.]', '', sq_ft_str)) if sq_ft_str else 0

	price_per_sqft = price / sq_ft if sq_ft else 0

	if not price:
	return {
	'assessment': 'no price',
	'confidence': 0.0,
	'price': 0,
	'formatted_price': '₹0',
	'price_per_sqft': 0,
	'formatted_price_per_sqft': '₹0',
	'price_range': 'unknown',
	'location_price_assessment': 'cannot assess',
	'has_price': False,
	'market_trends': {},
	'price_factors': {},
	'risk_indicators': []
	}

	# Use a more sophisticated model for price analysis
	classifier = load_model("zero-shot-classification", "typeform/mobilebert-uncased-mnli")

	# Create a detailed context for price analysis
	price_context = f"""
	Property Type: {data.get('property_type', '')}
	Location: {data.get('city', '')}, {data.get('state', '')}
	Size: {sq_ft} sq.ft.
	Price: ₹{price:,.2f}
	Price per sq.ft.: ₹{price_per_sqft:,.2f}
	Property Status: {data.get('status', '')}
	Year Built: {data.get('year_built', '')}
	Bedrooms: {data.get('bedrooms', '')}
	Bathrooms: {data.get('bathrooms', '')}
	Amenities: {data.get('amenities', '')}
	"""

	# Enhanced price categories with more specific indicators
	price_categories = [
	"reasonable market price",
	"suspiciously low price",
	"suspiciously high price",
	"average market price",
	"luxury property price",
	"budget property price",
	"premium property price",
	"mid-range property price",
	"overpriced for location",
	"underpriced for location",
	"price matches amenities",
	"price matches property age",
	"price matches location value",
	"price matches property condition",
	"price matches market trends"
	]

	# Analyze price with multiple aspects
	price_result = classifier(price_context, price_categories, multi_label=True)

	# Get top classifications with enhanced confidence calculation
	top_classifications = []
	for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
	if score > 0.25: # Lower threshold for better sensitivity
	top_classifications.append({
	'classification': label,
	'confidence': float(score)
	})

	# Determine price range based on AI classification and market data
	price_range = 'unknown'
	if top_classifications:
	primary_class = top_classifications[0]['classification']
	if 'luxury' in primary_class:
	price_range = 'luxury'
	elif 'premium' in primary_class:
	price_range = 'premium'
	elif 'mid-range' in primary_class:
	price_range = 'mid_range'
	elif 'budget' in primary_class:
	price_range = 'budget'

	# Enhanced location-specific price assessment
	location_assessment = "unknown"
	market_trends = {}
	if data.get('city') and price_per_sqft:
	city_lower = data['city'].lower()
	metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]

	# Define price ranges for different city tiers
	if any(city in city_lower for city in metro_cities):
	market_trends = {
	'city_tier': 'metro',
	'avg_price_range': {
	'min': 5000,
	'max': 30000,
	'trend': 'stable'
	},
	'price_per_sqft': {
	'current': price_per_sqft,
	'market_avg': 15000,
	'deviation': abs(price_per_sqft - 15000) / 15000 * 100
	}
	}
	location_assessment = (
	"reasonable" if 5000 <= price_per_sqft <= 30000 else
	"suspiciously low" if price_per_sqft < 5000 else
	"suspiciously high"
	)
	else:
	market_trends = {
	'city_tier': 'non-metro',
	'avg_price_range': {
	'min': 1500,
	'max': 15000,
	'trend': 'stable'
	},
	'price_per_sqft': {
	'current': price_per_sqft,
	'market_avg': 7500,
	'deviation': abs(price_per_sqft - 7500) / 7500 * 100
	}
	}
	location_assessment = (
	"reasonable" if 1500 <= price_per_sqft <= 15000 else
	"suspiciously low" if price_per_sqft < 1500 else
	"suspiciously high"
	)

	# Enhanced price analysis factors
	price_factors = {}
	risk_indicators = []

	# Property age factor
	try:
	year_built = int(data.get('year_built', 0))
	current_year = datetime.now().year
	property_age = current_year - year_built

	if property_age > 0:
	depreciation_factor = max(0.5, 1 - (property_age * 0.01)) # 1% depreciation per year, min 50%
	price_factors['age_factor'] = {
	'property_age': property_age,
	'depreciation_factor': depreciation_factor,
	'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
	}
	except:
	price_factors['age_factor'] = {'error': 'Invalid year built'}

	# Size factor
	if sq_ft > 0:
	size_factor = {
	'size': sq_ft,
	'price_per_sqft': price_per_sqft,
	'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
	}
	price_factors['size_factor'] = size_factor

	# Add risk indicators based on size
	if sq_ft < 300:
	risk_indicators.append('Unusually small property size')
	elif sq_ft > 10000:
	risk_indicators.append('Unusually large property size')

	# Amenities factor
	if data.get('amenities'):
	amenities_list = [a.strip() for a in data['amenities'].split(',')]
	amenities_score = min(1.0, len(amenities_list) * 0.1) # 10% per amenity, max 100%
	price_factors['amenities_factor'] = {
	'count': len(amenities_list),
	'score': amenities_score,
	'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
	}

	# Calculate overall confidence with weighted factors
	confidence_weights = {
	'primary_classification': 0.3,
	'location_assessment': 0.25,
	'age_factor': 0.2,
	'size_factor': 0.15,
	'amenities_factor': 0.1
	}
	confidence_scores = []

	# Primary classification confidence
	if top_classifications:
	confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])

	# Location assessment confidence
	location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
	confidence_scores.append(location_confidence * confidence_weights['location_assessment'])

	# Age factor confidence
	if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
	age_confidence = price_factors['age_factor']['depreciation_factor']
	confidence_scores.append(age_confidence * confidence_weights['age_factor'])

	# Size factor confidence
	if 'size_factor' in price_factors:
	size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
	confidence_scores.append(size_confidence * confidence_weights['size_factor'])

	# Amenities factor confidence
	if 'amenities_factor' in price_factors:
	amenities_confidence = price_factors['amenities_factor']['score']
	confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])

	overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())

	return {
	'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
	'confidence': float(overall_confidence),
	'price': price,
	'formatted_price': f"₹{price:,.0f}",
	'price_per_sqft': price_per_sqft,
	'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
	'price_range': price_range,
	'location_price_assessment': location_assessment,
	'has_price': True,
	'market_trends': market_trends,
	'price_factors': price_factors,
	'risk_indicators': risk_indicators,
	'top_classifications': top_classifications
	}
	except Exception as e:
	logger.error(f"Error analyzing price: {str(e)}")
	return {
	'assessment': 'error',
	'confidence': 0.0,
	'price': 0,
	'formatted_price': '₹0',
	'price_per_sqft': 0,
	'formatted_price_per_sqft': '₹0',
	'price_range': 'unknown',
	'location_price_assessment': 'error',
	'has_price': False,
	'market_trends': {},
	'price_factors': {},
	'risk_indicators': [],
	'top_classifications': []
	}