Upload 10 files
Browse files- AI Customer Behavior Agent/README.md +28 -0
- AI Customer Behavior Agent/app.py +88 -0
- AI Customer Behavior Agent/backend/src/__pycache__/dqn_agent.cpython-313.pyc +0 -0
- AI Customer Behavior Agent/backend/src/dashboard.py +64 -0
- AI Customer Behavior Agent/backend/src/dqn_agent.py +103 -0
- AI Customer Behavior Agent/backend/src/generate_data.py +64 -0
- AI Customer Behavior Agent/backend/src/insert_data.py +23 -0
- AI Customer Behavior Agent/config.yaml +0 -0
- AI Customer Behavior Agent/data/customers.csv +0 -0
- AI Customer Behavior Agent/requirements.txt +6 -0
AI Customer Behavior Agent/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Customer Agent RL
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
This project simulates an advanced customer behavior analysis using an RL agent (Deep Q-Network) based on a comprehensive synthetic dataset covering multiple sectors (Retail, E-commerce, Banking, Telecom, Travel). The dataset (30,000+ records) is generated using research insights and inserted into a local MongoDB database.
|
5 |
+
|
6 |
+
## File Structure
|
7 |
+
[AI_CUSTOMER_BEHAVIOR/
|
8 |
+
βββ data/
|
9 |
+
β βββ customers.csv # Generated synthetic customer records (30,000+)
|
10 |
+
βββ models/
|
11 |
+
β βββ dqn_model.pth # Saved RL model weights after training
|
12 |
+
βββ src/
|
13 |
+
β βββ generate_data.py # Script to generate a comprehensive multi-sector dataset
|
14 |
+
β βββ insert_data.py # Script to load the CSV and insert data into MongoDB
|
15 |
+
β βββ rl_agent.py # Advanced Deep Q-Network (DQN) agent implementation (OΒ³ model style)
|
16 |
+
β βββ dashboard.py # Streamlit dashboard to visualize customer behavior insights
|
17 |
+
βββ notebooks/
|
18 |
+
β βββ exploration.ipynb # Notebook for exploratory analysis and experiments
|
19 |
+
βββ requirements.txt # Dependencies for the project
|
20 |
+
βββ config.yaml # Configuration file for settings (MongoDB URI, hyperparameters, etc.)
|
21 |
+
βββ README.md # Documentation and instructions
|
22 |
+
|
23 |
+
|
24 |
+
## Setup & Usage
|
25 |
+
|
26 |
+
1. **Install Dependencies**
|
27 |
+
```bash
|
28 |
+
pip install -r requirements.txt
|
AI Customer Behavior Agent/app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import random
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from sklearn.preprocessing import MinMaxScaler
|
7 |
+
from backend.src.dqn_agent import AdvancedDQNAgent
|
8 |
+
|
9 |
+
st.set_page_config(page_title="Next (AI): Customer", layout="wide")
|
10 |
+
st.title("Next AI: Product Recommendation Agent")
|
11 |
+
st.markdown("Chat with nExT(AI) to get real-time customer targeting recommendations. Type a query (e.g., 'Show me discount recommendations') and nExT(AI) will reply with the corresponding customer table.")
|
12 |
+
|
13 |
+
# Initialize session state for chat history
|
14 |
+
if "chat_history" not in st.session_state:
|
15 |
+
st.session_state.chat_history = []
|
16 |
+
|
17 |
+
# Helper function to load and preprocess data
|
18 |
+
@st.cache_data(show_spinner=False)
|
19 |
+
def load_data():
|
20 |
+
df = pd.read_csv("data\\customers.csv")
|
21 |
+
return df
|
22 |
+
|
23 |
+
def preprocess_data(df):
|
24 |
+
churn_mapping = {"Low": 0, "Medium": 1, "High": 2}
|
25 |
+
df['ChurnRiskEncoded'] = df['ChurnRisk'].map(churn_mapping)
|
26 |
+
features = df[['Age', 'Income', 'PurchaseFrequency', 'AvgSpend', 'ChurnRiskEncoded']].values
|
27 |
+
scaler = MinMaxScaler()
|
28 |
+
features = scaler.fit_transform(features)
|
29 |
+
return features
|
30 |
+
|
31 |
+
# Load and preprocess data
|
32 |
+
df = load_data()
|
33 |
+
states = preprocess_data(df)
|
34 |
+
|
35 |
+
# Setup RL agent (state vector of 5 features; actions: 0: Discount, 1: Recommend Product, 2: No Action)
|
36 |
+
state_size = states.shape[1]
|
37 |
+
action_size = 3
|
38 |
+
agent = AdvancedDQNAgent(state_size, action_size)
|
39 |
+
|
40 |
+
# For demonstration, run the agent on all customers to compute recommendations.
|
41 |
+
recommendations = [agent.act(state) for state in states]
|
42 |
+
df['Recommendation'] = recommendations
|
43 |
+
|
44 |
+
# Group recommendations
|
45 |
+
discount_df = df[df['Recommendation'] == 0]
|
46 |
+
product_df = df[df['Recommendation'] == 1]
|
47 |
+
no_action_df = df[df['Recommendation'] == 2]
|
48 |
+
|
49 |
+
# Define a simple function to process user queries
|
50 |
+
def process_query(query):
|
51 |
+
query_lower = query.lower()
|
52 |
+
if "discount" in query_lower:
|
53 |
+
response = "Here are the customers recommended for a discount (Action 0):"
|
54 |
+
table = discount_df[['CustomerID', 'Age', 'Income', 'PurchaseFrequency', 'AvgSpend', 'ChurnRisk']]
|
55 |
+
elif "product" in query_lower:
|
56 |
+
response = "Here are the customers recommended for a product suggestion (Action 1):"
|
57 |
+
table = product_df[['CustomerID', 'Age', 'Income', 'PurchaseFrequency', 'AvgSpend', 'ChurnRisk']]
|
58 |
+
elif "no action" in query_lower:
|
59 |
+
response = "Here are the customers for whom no specific action is recommended (Action 2):"
|
60 |
+
table = no_action_df[['CustomerID', 'Age', 'Income', 'PurchaseFrequency', 'AvgSpend', 'ChurnRisk']]
|
61 |
+
elif "all" in query_lower or "recommendation" in query_lower:
|
62 |
+
response = "Here are all customer recommendations:"
|
63 |
+
table = df[['CustomerID', 'Age', 'Income', 'PurchaseFrequency', 'AvgSpend', 'ChurnRisk', 'Recommendation']]
|
64 |
+
else:
|
65 |
+
response = "I'm sorry, I didn't understand that. Please ask for discount, product, or no action recommendations."
|
66 |
+
table = None
|
67 |
+
return response, table
|
68 |
+
|
69 |
+
# Display chat history
|
70 |
+
for chat in st.session_state.chat_history:
|
71 |
+
if chat["role"] == "user":
|
72 |
+
st.markdown(f"**User:** {chat['message']}")
|
73 |
+
else:
|
74 |
+
st.markdown(f"**nExT(AI):** {chat['message']}")
|
75 |
+
if chat.get("table") is not None:
|
76 |
+
st.table(chat["table"])
|
77 |
+
|
78 |
+
# Input for new message
|
79 |
+
user_input = st.text_input("Type your message here and press Enter:")
|
80 |
+
|
81 |
+
if user_input:
|
82 |
+
# Append user's message to chat history
|
83 |
+
st.session_state.chat_history.append({"role": "user", "message": user_input})
|
84 |
+
# Process the query and get reply
|
85 |
+
reply_text, reply_table = process_query(user_input)
|
86 |
+
st.session_state.chat_history.append({"role": "agent", "message": reply_text, "table": reply_table})
|
87 |
+
# Clear input by simply relying on Streamlit's reactivity (the text input resets on script re-run)
|
88 |
+
# Note: Without experiment
|
AI Customer Behavior Agent/backend/src/__pycache__/dqn_agent.cpython-313.pyc
ADDED
Binary file (7.73 kB). View file
|
|
AI Customer Behavior Agent/backend/src/dashboard.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# File: src/dashboard.py
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from pymongo import MongoClient
|
6 |
+
|
7 |
+
# MongoDB settings (can also be loaded from config.yaml)
|
8 |
+
MONGO_URI = "mongodb://localhost:27017/"
|
9 |
+
DB_NAME = "customer_db"
|
10 |
+
COLLECTION_NAME = "customers"
|
11 |
+
|
12 |
+
def load_data():
|
13 |
+
client = MongoClient(MONGO_URI)
|
14 |
+
db = client[DB_NAME]
|
15 |
+
collection = db[COLLECTION_NAME]
|
16 |
+
data = list(collection.find({}, {"_id": 0}))
|
17 |
+
return pd.DataFrame(data)
|
18 |
+
|
19 |
+
st.title("π Advanced Multi-Sector Customer Behavior Dashboard")
|
20 |
+
|
21 |
+
df = load_data()
|
22 |
+
|
23 |
+
# Sidebar Filters
|
24 |
+
st.sidebar.header("Filters")
|
25 |
+
age_min, age_max = st.sidebar.slider("Age Range", 18, 80, (25, 60))
|
26 |
+
income_min, income_max = st.sidebar.slider("Income Range", 20000, 150000, (30000, 100000))
|
27 |
+
sectors = st.sidebar.multiselect("Select Sector(s)", options=df["Sector"].unique(), default=df["Sector"].unique())
|
28 |
+
|
29 |
+
filtered_df = df[
|
30 |
+
(df["Age"] >= age_min) & (df["Age"] <= age_max) &
|
31 |
+
(df["Income"] >= income_min) & (df["Income"] <= income_max) &
|
32 |
+
(df["Sector"].isin(sectors))
|
33 |
+
]
|
34 |
+
|
35 |
+
st.write(f"### Showing {len(filtered_df)} records based on filters")
|
36 |
+
st.dataframe(filtered_df.head(50))
|
37 |
+
|
38 |
+
# Plot: Distribution by Sector
|
39 |
+
st.subheader("Customer Distribution by Sector")
|
40 |
+
sector_counts = filtered_df["Sector"].value_counts()
|
41 |
+
fig, ax = plt.subplots()
|
42 |
+
sector_counts.plot(kind="bar", ax=ax)
|
43 |
+
st.pyplot(fig)
|
44 |
+
|
45 |
+
# Sector-specific insights
|
46 |
+
st.subheader("Sector-specific Insights")
|
47 |
+
for sector in sectors:
|
48 |
+
st.write(f"**{sector}**")
|
49 |
+
sector_data = filtered_df[filtered_df["Sector"] == sector]
|
50 |
+
if sector in ["Retail", "E-commerce"]:
|
51 |
+
avg_rating = sector_data["AvgRating"].mean() if "AvgRating" in sector_data.columns else None
|
52 |
+
st.write(f"Average Product Rating: {avg_rating:.2f}" if avg_rating else "No rating data")
|
53 |
+
elif sector == "Banking":
|
54 |
+
avg_credit = sector_data["CreditScore"].mean() if "CreditScore" in sector_data.columns else None
|
55 |
+
st.write(f"Average Credit Score: {avg_credit:.2f}" if avg_credit else "No credit data")
|
56 |
+
elif sector == "Telecom":
|
57 |
+
avg_data = sector_data["MonthlyDataGB"].mean() if "MonthlyDataGB" in sector_data.columns else None
|
58 |
+
st.write(f"Average Monthly Data Usage: {avg_data:.2f} GB" if avg_data else "No telecom data")
|
59 |
+
elif sector == "Travel":
|
60 |
+
avg_trips = sector_data["TripsPerYear"].mean() if "TripsPerYear" in sector_data.columns else None
|
61 |
+
st.write(f"Average Trips per Year: {avg_trips:.2f}" if avg_trips else "No travel data")
|
62 |
+
st.write("---")
|
63 |
+
|
64 |
+
st.write("Dashboard powered by **Streamlit, MongoDB & an advanced multi-sector synthetic dataset**")
|
AI Customer Behavior Agent/backend/src/dqn_agent.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# File: src/dqn_agent.py
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.optim as optim
|
5 |
+
import random
|
6 |
+
import numpy as np
|
7 |
+
from collections import deque
|
8 |
+
|
9 |
+
# Dueling DQN network architecture for stateβaction value estimation
|
10 |
+
class DuelingDQN(nn.Module):
|
11 |
+
def __init__(self, state_size, action_size):
|
12 |
+
super(DuelingDQN, self).__init__()
|
13 |
+
self.fc1 = nn.Linear(state_size, 128)
|
14 |
+
self.fc2 = nn.Linear(128, 128)
|
15 |
+
# Value stream
|
16 |
+
self.value_stream = nn.Sequential(
|
17 |
+
nn.Linear(128, 64),
|
18 |
+
nn.ReLU(),
|
19 |
+
nn.Linear(64, 1)
|
20 |
+
)
|
21 |
+
# Advantage stream
|
22 |
+
self.advantage_stream = nn.Sequential(
|
23 |
+
nn.Linear(128, 64),
|
24 |
+
nn.ReLU(),
|
25 |
+
nn.Linear(64, action_size)
|
26 |
+
)
|
27 |
+
|
28 |
+
def forward(self, x):
|
29 |
+
x = torch.relu(self.fc1(x))
|
30 |
+
x = torch.relu(self.fc2(x))
|
31 |
+
value = self.value_stream(x)
|
32 |
+
advantage = self.advantage_stream(x)
|
33 |
+
# Combine streams to get Q-values
|
34 |
+
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
|
35 |
+
return q_values
|
36 |
+
|
37 |
+
class AdvancedDQNAgent:
|
38 |
+
def __init__(self, state_size, action_size, device="cpu"):
|
39 |
+
self.state_size = state_size
|
40 |
+
self.action_size = action_size
|
41 |
+
self.device = device
|
42 |
+
|
43 |
+
self.memory = deque(maxlen=10000)
|
44 |
+
self.gamma = 0.99 # discount factor
|
45 |
+
self.epsilon = 1.0 # exploration rate
|
46 |
+
self.epsilon_min = 0.01
|
47 |
+
self.epsilon_decay = 0.995
|
48 |
+
self.learning_rate = 0.001
|
49 |
+
self.batch_size = 64
|
50 |
+
|
51 |
+
self.policy_net = DuelingDQN(state_size, action_size).to(device)
|
52 |
+
self.target_net = DuelingDQN(state_size, action_size).to(device)
|
53 |
+
self.update_target_network()
|
54 |
+
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=self.learning_rate)
|
55 |
+
self.criterion = nn.MSELoss()
|
56 |
+
|
57 |
+
def update_target_network(self):
|
58 |
+
self.target_net.load_state_dict(self.policy_net.state_dict())
|
59 |
+
|
60 |
+
def act(self, state):
|
61 |
+
if np.random.rand() <= self.epsilon:
|
62 |
+
return random.randrange(self.action_size)
|
63 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
64 |
+
with torch.no_grad():
|
65 |
+
q_values = self.policy_net(state_tensor)
|
66 |
+
return int(torch.argmax(q_values).item())
|
67 |
+
|
68 |
+
def remember(self, state, action, reward, next_state, done):
|
69 |
+
self.memory.append((state, action, reward, next_state, done))
|
70 |
+
|
71 |
+
def replay(self):
|
72 |
+
if len(self.memory) < self.batch_size:
|
73 |
+
return
|
74 |
+
|
75 |
+
batch = random.sample(self.memory, self.batch_size)
|
76 |
+
states, actions, rewards, next_states, dones = zip(*batch)
|
77 |
+
states = torch.FloatTensor(states).to(self.device)
|
78 |
+
actions = torch.LongTensor(actions).unsqueeze(1).to(self.device)
|
79 |
+
rewards = torch.FloatTensor(rewards).unsqueeze(1).to(self.device)
|
80 |
+
next_states = torch.FloatTensor(next_states).to(self.device)
|
81 |
+
dones = torch.FloatTensor(dones).unsqueeze(1).to(self.device)
|
82 |
+
|
83 |
+
# Compute current Q-values
|
84 |
+
current_q = self.policy_net(states).gather(1, actions)
|
85 |
+
# Double DQN: select next action using policy net, evaluate with target net
|
86 |
+
next_actions = torch.argmax(self.policy_net(next_states), dim=1, keepdim=True)
|
87 |
+
next_q = self.target_net(next_states).gather(1, next_actions)
|
88 |
+
target_q = rewards + (self.gamma * next_q * (1 - dones))
|
89 |
+
|
90 |
+
loss = self.criterion(current_q, target_q.detach())
|
91 |
+
self.optimizer.zero_grad()
|
92 |
+
loss.backward()
|
93 |
+
self.optimizer.step()
|
94 |
+
|
95 |
+
if self.epsilon > self.epsilon_min:
|
96 |
+
self.epsilon *= self.epsilon_decay
|
97 |
+
|
98 |
+
def save(self, path):
|
99 |
+
torch.save(self.policy_net.state_dict(), path)
|
100 |
+
|
101 |
+
def load(self, path):
|
102 |
+
self.policy_net.load_state_dict(torch.load(path))
|
103 |
+
self.update_target_network()
|
AI Customer Behavior Agent/backend/src/generate_data.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# File: src/generate_data.py
|
2 |
+
import pandas as pd
|
3 |
+
import random
|
4 |
+
import uuid
|
5 |
+
from faker import Faker
|
6 |
+
|
7 |
+
fake = Faker()
|
8 |
+
|
9 |
+
def generate_customer_record():
|
10 |
+
customer_id = str(uuid.uuid4())
|
11 |
+
age = random.randint(18, 80)
|
12 |
+
gender = random.choice(["Male", "Female", "Other"])
|
13 |
+
income = round(random.uniform(20000, 150000), 2)
|
14 |
+
purchase_frequency = random.randint(1, 100)
|
15 |
+
avg_spend = round(random.uniform(10, 2000), 2)
|
16 |
+
churn_risk = random.choice(["Low", "Medium", "High"])
|
17 |
+
|
18 |
+
# Sector and sectorβspecific details
|
19 |
+
sector = random.choice(["Retail", "E-commerce", "Banking", "Telecom", "Travel"])
|
20 |
+
if sector in ["Retail", "E-commerce"]:
|
21 |
+
product_category = random.choice(["Electronics", "Fashion", "Home", "Sports", "Beauty"])
|
22 |
+
avg_rating = round(random.uniform(1, 5), 1)
|
23 |
+
cart_abandon_rate = round(random.uniform(0, 0.5), 2)
|
24 |
+
extra = {"Sector": sector, "ProductCategory": product_category,
|
25 |
+
"AvgRating": avg_rating, "CartAbandonRate": cart_abandon_rate}
|
26 |
+
elif sector == "Banking":
|
27 |
+
credit_score = random.randint(300, 850)
|
28 |
+
num_transactions = random.randint(10, 200)
|
29 |
+
extra = {"Sector": sector, "CreditScore": credit_score,
|
30 |
+
"NumTransactions": num_transactions}
|
31 |
+
elif sector == "Telecom":
|
32 |
+
monthly_data = round(random.uniform(0.5, 50), 2)
|
33 |
+
call_minutes = random.randint(100, 3000)
|
34 |
+
extra = {"Sector": sector, "MonthlyDataGB": monthly_data,
|
35 |
+
"CallMinutes": call_minutes}
|
36 |
+
elif sector == "Travel":
|
37 |
+
trips_per_year = random.randint(0, 15)
|
38 |
+
loyalty_tier = random.choice(["Bronze", "Silver", "Gold", "Platinum"])
|
39 |
+
extra = {"Sector": sector, "TripsPerYear": trips_per_year,
|
40 |
+
"LoyaltyTier": loyalty_tier}
|
41 |
+
else:
|
42 |
+
extra = {"Sector": sector}
|
43 |
+
|
44 |
+
record = {
|
45 |
+
"CustomerID": customer_id,
|
46 |
+
"Age": age,
|
47 |
+
"Gender": gender,
|
48 |
+
"Income": income,
|
49 |
+
"PurchaseFrequency": purchase_frequency,
|
50 |
+
"AvgSpend": avg_spend,
|
51 |
+
"ChurnRisk": churn_risk
|
52 |
+
}
|
53 |
+
record.update(extra)
|
54 |
+
return record
|
55 |
+
|
56 |
+
def generate_dataset(num_records=30000):
|
57 |
+
records = [generate_customer_record() for _ in range(num_records)]
|
58 |
+
df = pd.DataFrame(records)
|
59 |
+
# Save CSV to the data folder
|
60 |
+
df.to_csv("data\\customers.csv", index=False)
|
61 |
+
print(f" Generated {num_records} customer records and saved to ../data/customers.csv")
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
generate_dataset()
|
AI Customer Behavior Agent/backend/src/insert_data.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# File: src/insert_data.py
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
|
5 |
+
# Configuration (can also be set via config.yaml)
|
6 |
+
MONGO_URI = "mongodb://localhost:27017/"
|
7 |
+
DB_NAME = "customer_db"
|
8 |
+
COLLECTION_NAME = "customers"
|
9 |
+
|
10 |
+
def insert_data():
|
11 |
+
client = MongoClient(MONGO_URI)
|
12 |
+
db = client[DB_NAME]
|
13 |
+
collection = db[COLLECTION_NAME]
|
14 |
+
|
15 |
+
# Load data from CSV
|
16 |
+
df = pd.read_csv("data\\customers.csv")
|
17 |
+
records = df.to_dict(orient="records")
|
18 |
+
collection.delete_many({})
|
19 |
+
collection.insert_many(records)
|
20 |
+
print(f" Inserted {len(records)} records into MongoDB collection '{COLLECTION_NAME}' in DB '{DB_NAME}'.")
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
insert_data()
|
AI Customer Behavior Agent/config.yaml
ADDED
File without changes
|
AI Customer Behavior Agent/data/customers.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
AI Customer Behavior Agent/requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
faker
|
3 |
+
pymongo
|
4 |
+
torch
|
5 |
+
streamlit
|
6 |
+
matplotlib
|