Random Forest Classifier
Growing decision trees on MA indicator's ground
In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
from joblib import dump as joblibDump
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from io import StringIO
In [4]:
# load tickers list
df = pd.read_csv("data/SnP500.csv")
SnP500 = dict(zip(df['Ticker'], df['Company']))
In [5]:
# check if some models has already been trained
try:
df_train = pd.read_csv("data/__train.csv")
except FileNotFoundError:
df_train = pd.DataFrame(columns=['ticker', 'accuracy score'])
In [6]:
# trained model with each tickers
for ticker, description in SnP500.items():
df_OHLCV = pd.DataFrame()
try:
df = pd.read_csv("data/{0}.csv".format(ticker))
except FileNotFoundError:
data = yf.download(ticker, start='2022-01-01', end='2023-01-01')
df = pd.DataFrame(data=data)
df.to_csv("data/{0}.csv".format(ticker))
except Exception as e:
try:
df_train.to_csv("data/train.csv")
except: pass
print("Error while trying to load ticker : {0} : {1}".format(ticker, e))
exit(1)
# if indicator calculation has not already been made
if not 'Trend' in df.columns:
df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA50'] = df['Close'].rolling(window=50).mean()
df['Trend'] = np.where(df['MA10'] > df['MA50'], 'Uptrend',
np.where(df['MA10'] < df['MA50'], 'Downtrend', 'Sideways'))
df = df.dropna()
# if train has not already been made
if not 'accuracy score' in df.columns:
try:
# Save last calculation
df.to_csv("data/{0}.csv".format(ticker), mode='w', index=False)
X = df[['Open', 'High', 'Low', 'Close', 'Volume', 'MA10', 'MA50']]
y = df['Trend']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=5, random_state=42)
# Train the classifier
rf_classifier.fit(X_train, y_train)
# Make predictions
y_pred = rf_classifier.predict(X_test)
# Save trained model
joblibDump(rf_classifier, "data/RandomForestClassifier/{0}.pkl".format(ticker))
acc_score = accuracy_score(y_test, y_pred)
df['accuracy score'] = acc_score
df.to_csv("data/{0}.csv".format(ticker), mode='w', index=False)
report_df = pd.read_fwf(StringIO(classification_report(y_test, y_pred)), header=0, index_col=0)
report_df.to_csv("data/report_{0}.csv".format(ticker))
record = pd.DataFrame({'ticker': [ticker], 'accuracy score': [acc_score]})
df_train = pd.concat([df_train, record], ignore_index=True)
except Exception as e:
print("Ticker : {0} error while trying to train model for : {1}".format(ticker, e))
continue
df_train.to_csv("data/__train.csv")
Ticker : GEHC error while trying to train model for : With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters. Ticker : KVUE error while trying to train model for : With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
In [20]:
df_train = df_train.sort_values(by='accuracy score', ascending=False)
for report in df_train['ticker'].head(10):
df_top10 = pd.read_csv("data/report_{0}.csv".format(ticker))
print("****************************************************")
print("Random Forest Report for {0}\n".format(report))
print(df_top10)
**************************************************** Random Forest Report for NFLX Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for AFL Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for VTR Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for AVB Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for GOOG Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for HST Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for MDLZ Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for LNC Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for ZBRA Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41 **************************************************** Random Forest Report for ZION Unnamed: 0 precision recall f1-score support 0 Downtrend 0.94 1.00 0.97 29 1 Uptrend 1.00 0.83 0.91 12 2 accuracy NaN NaN 0.95 41 3 macro avg 0.97 0.92 0.94 41 4 weighted avg 0.95 0.95 0.95 41
In [ ]: