2019年7月7日 星期日

[股票] 結合多層1D CNN和LSTM預測股價走勢

結合多層1D CNN和LSTM預測股價走勢

1D CNN具有短期特徵擷取與短期記憶功能,而LSTM則具有較長期的記憶功能,我們結合1D CNN和LSTM的優點來預測股價走勢,並與之前的實作案例多層LSTM模型比較 訓練資料與之前多層LSTM大致相同,唯一不能的地方是訓練資料的input為三維度向量(樣本,股價,ConvNet output)

import os
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import numpy as np
from numpy import newaxis
import pandas as pd
from subprocess import check_output
from keras import layers
from keras.layers.core import Activation, Dropout, Flatten
from keras.layers import Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import time
from sklearn.preprocessing import MinMaxScalerv
import matplotlib.pyplot as plt

載入資料庫

os.getcwd() #取得目前路徑
os.chdir('/Users/laihunglai/Dropbox/Tensorflow/stock')
os.getcwd()
db=sqlite3.connect('stock_by_index')
stock=pd.read_sql(con=db,sql='SELECT * FROM "0050"')
stock_price=stock['收盤價'].values.astype('float32')[:,np.newaxis]
scaler = MinMaxScaler(feature_range=(0, 1))
stock_price = scaler.fit_transform(stock_price)
plt.plot(stock_price)
plt.show()

train_size = int(len(stock_price) * 0.80)
test_size = len(stock_price) - train_size
train, test = stock_price[0:train_size,:], stock_price[train_size:len(stock_price),:]
print(len(train), len(test))
976 245

準備訓練資料的函數: 輸入為股價,輸出為dataX:m筆look_back天數的股價shape=(m,look_back)與dataY:m筆look_back後一天的股價shape=(m,1)

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in np.arange(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        b=dataset[(i+look_back), 0]
        dataY.append(b)
    return np.array(dataX), np.array(dataY)


#將訓練和測試股價帶入上面的函數得到(trainX, trainY,),(testX, testY)
look_back = 30
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX=trainX[:,:,np.newaxis]
testX=testX[:,:,np.newaxis]
trainX.shape
(945, 30, 1)

#建立多層LSTM,最後用密集層輸出一維度的預測值
model = Sequential()
model.add(layers.Conv1D(32,5,activation='relu',padding='same',input_shape=(None,trainX[0].shape[-1])))
model.add(layers.MaxPool1D(3))
model.add(layers.Conv1D(32,5,activation='relu',padding='same'))
model.add(layers.MaxPool1D(3))
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32,return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))
start = time.time()
model.compile(loss='mse', optimizer='rmsprop', metrics=['mae'])
print ('compilation time : ', time.time() - start)
model.summary()
________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1d_1 (Conv1D)            (None, None, 32)          192       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, None, 32)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, None, 32)          5152      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, None, 32)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 32)          8320      
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 32)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, None, 32)          8320      
_________________________________________________________________
dropout_2 (Dropout)          (None, None, 32)          0         
_________________________________________________________________
lstm_3 (LSTM)                (None, None, 32)          8320      
_________________________________________________________________
dropout_3 (Dropout)          (None, None, 32)          0         
_________________________________________________________________
lstm_4 (LSTM)                (None, None, 32)          8320      
_________________________________________________________________
dropout_4 (Dropout)          (None, None, 32)          0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_5 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
_________________________________________________________________
activation_1 (Activation)    (None, 1)                 0         
=================================================================
Total params: 46,977
Trainable params: 46,977
Non-trainable params: 0
_________________________________________________________________

train_history=model.fit(trainX,trainY,batch_size=128,nb_epoch=20,validation_split=0.2)

Train on 756 samples, validate on 189 samples
Epoch 1/20
756/756 [==============================] - 10s 13ms/step - loss: 0.1512 - mean_absolute_error: 0.3462 - val_loss: 0.4892 - val_mean_absolute_error: 0.6971
Epoch 2/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0616 - mean_absolute_error: 0.1950 - val_loss: 0.0910 - val_mean_absolute_error: 0.2958
Epoch 3/20
756/756 [==============================] - 2s 3ms/step - loss: 0.0205 - mean_absolute_error: 0.1135 - val_loss: 0.0843 - val_mean_absolute_error: 0.2840
Epoch 4/20
756/756 [==============================] - 2s 2ms/step - loss: 0.0171 - mean_absolute_error: 0.1028 - val_loss: 0.0177 - val_mean_absolute_error: 0.1170
Epoch 5/20
756/756 [==============================] - 2s 2ms/step - loss: 0.0126 - mean_absolute_error: 0.0892 - val_loss: 0.0354 - val_mean_absolute_error: 0.1767
Epoch 6/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0144 - mean_absolute_error: 0.0936 - val_loss: 0.0176 - val_mean_absolute_error: 0.1152
Epoch 7/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0117 - mean_absolute_error: 0.0862 - val_loss: 0.0052 - val_mean_absolute_error: 0.0570
Epoch 8/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0113 - mean_absolute_error: 0.0823 - val_loss: 0.0052 - val_mean_absolute_error: 0.0585
Epoch 9/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0119 - mean_absolute_error: 0.0869 - val_loss: 0.0332 - val_mean_absolute_error: 0.1685
Epoch 10/20
756/756 [==============================] - 2s 3ms/step - loss: 0.0115 - mean_absolute_error: 0.0848 - val_loss: 0.0070 - val_mean_absolute_error: 0.0704
Epoch 11/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0104 - mean_absolute_error: 0.0801 - val_loss: 0.0062 - val_mean_absolute_error: 0.0664
Epoch 12/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0088 - mean_absolute_error: 0.0739 - val_loss: 0.0317 - val_mean_absolute_error: 0.1616
Epoch 13/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0115 - mean_absolute_error: 0.0858 - val_loss: 0.0099 - val_mean_absolute_error: 0.0791
Epoch 14/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0083 - mean_absolute_error: 0.0726 - val_loss: 0.0411 - val_mean_absolute_error: 0.1871
Epoch 15/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0134 - mean_absolute_error: 0.0930 - val_loss: 0.0088 - val_mean_absolute_error: 0.0740
Epoch 16/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0088 - mean_absolute_error: 0.0741 - val_loss: 0.0073 - val_mean_absolute_error: 0.0722
Epoch 17/20
756/756 [==============================] - 2s 3ms/step - loss: 0.0095 - mean_absolute_error: 0.0778 - val_loss: 0.0056 - val_mean_absolute_error: 0.0620
Epoch 18/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0097 - mean_absolute_error: 0.0781 - val_loss: 0.0059 - val_mean_absolute_error: 0.0641
Epoch 19/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0078 - mean_absolute_error: 0.0696 - val_loss: 0.0066 - val_mean_absolute_error: 0.0630
Epoch 20/20
756/756 [==============================] - 1s 2ms/step - loss: 0.0113 - mean_absolute_error: 0.0835 - val_loss: 0.0065 - val_mean_absolute_error: 0.0625

import matplotlib.pyplot as plt
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train history')
    plt.ylabel(train)
    plt.yscale('log')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
show_train_history(train_history,'loss','val_loss')
show_train_history(train_history,'mean_absolute_error','val_mean_absolute_error')
def predict_sequences_multiple(model, firstValue,length):
    prediction_seqs = []
    curr_frame = firstValue
    for i in range(length):
        a=model.predict(curr_frame[np.newaxis,:])[0,0]
        curr_frame=np.insert(curr_frame,look_back,a)[-look_back:][:,newaxis]
        prediction_seqs.append(a)
#        print(curr_frame.reshape(1,30))
    return prediction_seqs
        
#        print(curr_frame.reshape(curr_frame.shape[1],curr_frame.shape[0]))

def inverse_transform(testY,predictions):
    predictions_origins=scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    answer=scaler.inverse_transform(testY.reshape(-1,1))
    return answer,predictions_origins
def stock_pred_plot(answer,predictions_origins): error=(predictions_origins-answer)/answer*100 fig,axis=plt.subplots(2,1,figsize=(5,4)) axis[0].plot(predictions_origins,label='predict') axis[0].plot(answer,label='real') axis[0].set_ylabel('price') axis[0].legend() axis[1].set_xlabel('Day') axis[1].set_ylabel('error(%)') axis[1].plot(error,label='error') axis[1].legend() plt.show()
def error_history(answer,predictions_origins): result=(predictions_origins-answer)/answer*100 return result[:,0]
predict_length=30 for i in np.arange(0,100,5): predictions = predict_sequences_multiple(model, testX[i], predict_length) answer=testY[i:(i+predict_length)] ans,pred=inverse_transform(answer,predictions) stock_pred_plot(ans,pred)
#計算誤差值 error_history_all=[] for i in np.arange(0,100,5): predictions = predict_sequences_multiple(model, testX[i], predict_length) answer=testY[i:(i+predict_length)] ans,pred=inverse_transform(answer,predictions) error_history_all.append(error_history(ans,pred)) all_error=np.array(error_history_all) mean=all_error.mean(axis=0) std=all_error.std(axis=0) df=pd.DataFrame(all_error)
#畫出測試誤差 fig,axis=plt.subplots(2,1, figsize=(10,5)) df.T.plot(ax=axis[0]) axis[0].set_ylabel('Error(%)') import matplotlib.pyplot as plt x=list(range(len(df.T))) axis[1].errorbar(x, mean, std, linestyle='None', marker='^') axis[1].set_xlabel('Day') axis[1].set_ylabel('Error(%)')
Text(0,0.5,'Error(%)')

三倍槓桿和一倍槓桿的長期定期定額報酬率分析

  以下是中國,美國股票債卷的三倍槓桿和一倍槓桿ETF分析.可以發現,三倍槓桿在下跌時期的跌幅遠比一倍槓桿的多 .且從時間軸來看,三倍槓桿由於下跌力道較強,因此會把之前的漲幅都吃掉,所以對於長期上身的市場,例如美國科技股,由於上升時間遠比下跌時間長,所以持有TQQQ的長期回報率會...