Last active
December 16, 2022 00:10
-
-
Save A-safarji/a6d9835e25b98db4168a2b078142a3c8 to your computer and use it in GitHub Desktop.
LSTM model building
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| from sklearn import preprocessing | |
| from sklearn.model_selection import train_test_split | |
| # Reading and converting Date | |
| data = pd.read_csv('AAPL.csv') | |
| data['Date'] = pd.to_datetime(data['Date'], infer_datetime_format=True) | |
| data.info() | |
| # feature selection from the dataset is 'Adj Close' because it is the closing price after adjustments for all applicable splits and dividend dist | |
| y = data['Adj Close'] | |
| # normalize the data before model fitting, it will boost the performance (in Neural Networks). + transform | |
| from sklearn.preprocessing import MinMaxScaler | |
| scaler=MinMaxScaler(feature_range=(0,1)) # scale of the output and input in the range 0–1 to match the scale of the layer of LSTM | |
| y=scaler.fit_transform(np.array(y).reshape(-1,1)) # reshape:convert the univariate 1D array into 2D | |
| ##splitting dataset into train and test split | |
| training_size=int(len(y)*0.65) | |
| test_size=len(y)-training_size | |
| train_data,test_data=y[0:training_size,:],y[training_size:len(y),:1] | |
| # check size | |
| training_size,test_size | |
| # Output: (1937, 1043) | |
| # building the input variable | |
| def create_dataset(dataset, time_step=1): | |
| dataX, dataY = [], [] | |
| for i in range(len(dataset)-time_step-1): # Time Series setps (0-99,100-200,,,,) any steps | |
| a = dataset[i:(i+time_step), 0] | |
| dataX.append(a) | |
| dataY.append(dataset[i + time_step, 0]) | |
| return numpy.array(dataX), numpy.array(dataY) | |
| # Initialize steps(this example 100 steps). It means that the model makes predictions based on the last 100 data | |
| time_step = 100 | |
| X_train, y_train = create_dataset(train_data, time_step) # apply create_dataset() | |
| X_test, ytest = create_dataset(test_data, time_step) | |
| print(X_train.shape), print(y_train.shape) | |
| #Output: (1836, 100) #The 100 is time_step | |
| #Output: (1836,) | |
| # reshape train & input-output pairs | |
| X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) | |
| X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment