MachineLearningCryptoModel / randomforestML.py
solanaexpert's picture
Create randomforestML.py
56ad009 verified
raw
history blame
3.61 kB
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from binance.client import Client
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import ta
# Connect to Binance (Fill your own API keys if live)
# client = Client(api_key, api_secret)
client = Client()
# File to store the historical data
DATA_FILE = "btc_data.csv"
symbol = "BTCUSDT"
interval = Client.KLINE_INTERVAL_4HOUR
# Load existing data or download fresh
if os.path.exists(DATA_FILE):
print("Loading existing data...")
df = pd.read_csv(DATA_FILE, index_col=0, parse_dates=True)
last_timestamp = df.index[-1]
# Binance gives data in 15min intervals, so move forward
start_time = last_timestamp + timedelta(minutes=15)
start_str = start_time.strftime("%d %B %Y %H:%M:%S")
print(f"Downloading new data from {start_str}...")
new_klines = client.get_historical_klines(symbol, interval, start_str)
if new_klines:
new_df = pd.DataFrame(new_klines, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume',
'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
new_df = new_df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
new_df[['open', 'high', 'low', 'close', 'volume']] = new_df[['open', 'high', 'low', 'close', 'volume']].astype(float)
new_df['timestamp'] = pd.to_datetime(new_df['timestamp'], unit='ms')
new_df = new_df.set_index('timestamp')
# Append and remove any duplicates (just in case)
df = pd.concat([df, new_df])
df = df[~df.index.duplicated(keep='first')]
df.to_csv(DATA_FILE)
else:
print("Downloading all data from scratch...")
klinesT = client.get_historical_klines(symbol, interval, "01 December 2021")
df = pd.DataFrame(klinesT, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume',
'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(float)
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df = df.set_index('timestamp')
df.to_csv(DATA_FILE)
# Feature Engineering: Add technical indicators
df['rsi'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
df['sma_fast'] = df['close'].rolling(window=5).mean()
df['sma_slow'] = df['close'].rolling(window=20).mean()
df['macd'] = ta.trend.MACD(df['close']).macd()
df['ema'] = df['close'].ewm(span=10, adjust=False).mean()
# Create target: 1 if next close > current close, else 0
df['target'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)
# Drop rows with NaN values
df = df.dropna()
# Features and Target
features = ['rsi', 'sma_fast', 'sma_slow', 'macd', 'ema']
X = df[features]
y = df['target']
# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
# Train Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
# Predict next movement
latest_features = X.iloc[-1].values.reshape(1, -1)
predicted_direction = model.predict(latest_features)
print(f"Predicted next movement: {'UP' if predicted_direction[0] == 1 else 'DOWN'}")