Delete data/preprocessing.py
Browse files- data/preprocessing.py +0 -23
data/preprocessing.py
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from sklearn.ensemble import IsolationForest
|
3 |
-
|
4 |
-
def clean_data(file):
|
5 |
-
"""
|
6 |
-
Bersihkan data UMKM dari anomaly (outlier).
|
7 |
-
Contoh input: File CSV dengan kolom: tanggal, demand, supply
|
8 |
-
"""
|
9 |
-
# Baca data
|
10 |
-
df = pd.read_csv(file)
|
11 |
-
|
12 |
-
# Konversi tanggal
|
13 |
-
df['tanggal'] = pd.to_datetime(df['tanggal'])
|
14 |
-
|
15 |
-
# Deteksi anomaly
|
16 |
-
clf = IsolationForest(contamination=0.05, random_state=42)
|
17 |
-
df['anomaly'] = clf.fit_predict(df[['demand', 'supply']])
|
18 |
-
|
19 |
-
# Filter data bersih
|
20 |
-
clean_df = df[df['anomaly'] == 1].copy()
|
21 |
-
clean_df.drop('anomaly', axis=1, inplace=True)
|
22 |
-
|
23 |
-
return clean_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|