|
|
|
import pandas as pd
|
|
import random
|
|
import uuid
|
|
from faker import Faker
|
|
|
|
fake = Faker()
|
|
|
|
def generate_customer_record():
|
|
customer_id = str(uuid.uuid4())
|
|
age = random.randint(18, 80)
|
|
gender = random.choice(["Male", "Female", "Other"])
|
|
income = round(random.uniform(20000, 150000), 2)
|
|
purchase_frequency = random.randint(1, 100)
|
|
avg_spend = round(random.uniform(10, 2000), 2)
|
|
churn_risk = random.choice(["Low", "Medium", "High"])
|
|
|
|
|
|
sector = random.choice(["Retail", "E-commerce", "Banking", "Telecom", "Travel"])
|
|
if sector in ["Retail", "E-commerce"]:
|
|
product_category = random.choice(["Electronics", "Fashion", "Home", "Sports", "Beauty"])
|
|
avg_rating = round(random.uniform(1, 5), 1)
|
|
cart_abandon_rate = round(random.uniform(0, 0.5), 2)
|
|
extra = {"Sector": sector, "ProductCategory": product_category,
|
|
"AvgRating": avg_rating, "CartAbandonRate": cart_abandon_rate}
|
|
elif sector == "Banking":
|
|
credit_score = random.randint(300, 850)
|
|
num_transactions = random.randint(10, 200)
|
|
extra = {"Sector": sector, "CreditScore": credit_score,
|
|
"NumTransactions": num_transactions}
|
|
elif sector == "Telecom":
|
|
monthly_data = round(random.uniform(0.5, 50), 2)
|
|
call_minutes = random.randint(100, 3000)
|
|
extra = {"Sector": sector, "MonthlyDataGB": monthly_data,
|
|
"CallMinutes": call_minutes}
|
|
elif sector == "Travel":
|
|
trips_per_year = random.randint(0, 15)
|
|
loyalty_tier = random.choice(["Bronze", "Silver", "Gold", "Platinum"])
|
|
extra = {"Sector": sector, "TripsPerYear": trips_per_year,
|
|
"LoyaltyTier": loyalty_tier}
|
|
else:
|
|
extra = {"Sector": sector}
|
|
|
|
record = {
|
|
"CustomerID": customer_id,
|
|
"Age": age,
|
|
"Gender": gender,
|
|
"Income": income,
|
|
"PurchaseFrequency": purchase_frequency,
|
|
"AvgSpend": avg_spend,
|
|
"ChurnRisk": churn_risk
|
|
}
|
|
record.update(extra)
|
|
return record
|
|
|
|
def generate_dataset(num_records=30000):
|
|
records = [generate_customer_record() for _ in range(num_records)]
|
|
df = pd.DataFrame(records)
|
|
|
|
df.to_csv("data\\customers.csv", index=False)
|
|
print(f" Generated {num_records} customer records and saved to ../data/customers.csv")
|
|
|
|
if __name__ == "__main__":
|
|
generate_dataset()
|
|
|