Created
December 29, 2021 21:40
-
-
Save A-safarji/73e9cc41024eb5e144751ebc6d5ea37d to your computer and use it in GitHub Desktop.
Deep learning models for time series forecasting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| from sklearn import preprocessing | |
| from sklearn.model_selection import train_test_split | |
| # Reading and converting Date | |
| data = pd.read_csv('AAPL.csv') | |
| data['Date'] = pd.to_datetime(data['Date'], infer_datetime_format=True) | |
| data.info() | |
| # feature selection from the dataset is 'Adj Close' because it is the closing price after adjustments for all applicable splits and dividend dist | |
| y = data['Adj Close'] | |
| # normalize the data before model fitting, it will boost the performance (in Neural Networks). + transform | |
| from sklearn.preprocessing import MinMaxScaler | |
| scaler=MinMaxScaler(feature_range=(0,1)) # scale of the output and input in the range 0–1 to match the scale of the layer of LSTM | |
| y=scaler.fit_transform(np.array(y).reshape(-1,1)) # reshape:convert the univariate 1D array into 2D | |
| ##splitting dataset into train and test split | |
| training_size=int(len(y)*0.65) | |
| test_size=len(y)-training_size | |
| train_data,test_data=y[0:training_size,:],y[training_size:len(y),:1] | |
| # check size | |
| training_size,test_size | |
| # Output: (1937, 1043) | |
| # building the input variable | |
| def create_dataset(dataset, time_step=1): | |
| dataX, dataY = [], [] | |
| for i in range(len(dataset)-time_step-1): # Time Series setps (0-99,100-200,,,,) any steps | |
| a = dataset[i:(i+time_step), 0] | |
| dataX.append(a) | |
| dataY.append(dataset[i + time_step, 0]) | |
| return numpy.array(dataX), numpy.array(dataY) | |
| # Initialize steps(this example 100 steps). It means that the model makes predictions based on the last 100 data | |
| time_step = 100 | |
| X_train, y_train = create_dataset(train_data, time_step) # apply create_dataset() | |
| X_test, ytest = create_dataset(test_data, time_step) | |
| print(X_train.shape), print(y_train.shape) | |
| #Output: (1836, 100) #The 100 is time_step | |
| #Output: (1836,) | |
| X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1) | |
| X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment