從台灣證交所下載每日交易資料成csv檔
import datetime
import requests
from io import StringIO
import pandas as pd
import numpy as np
import time
#設定爬蟲的開始和結束的日期'%d-%m-%Y'
start = datetime.datetime.strptime("04-06-2019", "%d-%m-%Y")
end = datetime.datetime.strptime("06-07-2019", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]
#以for迴圈抓取每日股票資訊,並存入硬碟,休息五秒再讀取下一筆資料以免被遠端伺服器封鎖
for i in date_generated:
print(i.strftime("%Y%m%d"))
datestr=i.strftime("%Y%m%d")
r = requests.post('http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + datestr + '&type=ALL')
if len(r.text)>0:
df = pd.read_csv(StringIO("\n".join([i.translate({ord(c): None for c in ' '})
for i in r.text.split('\n')
if len(i.split('",')) == 17 and i[0] != '='])), header=0)
df.to_csv('stock/2019/'+datestr+'.csv')
time.sleep( 5 )
將每日的交易資料csv檔寫入資料庫
os.getcwd() #取得目前路徑
path=os.path.join(os.getcwd(),'stock/2019') #連結路徑
os.chdir(path) #改變工作目錄
os.getcwd() #取得目前工作路徑
All_csv_file = glob.glob('*.csv') #取的目前工作路徑的所有.csv檔案名稱為陣列資料
#連接到資料庫,如果沒有的話會重新建一個
dbname = 'TWStock2019.db'
db = sqlite3.connect(dbname)
for file_name in All_csv_file:
pd.read_csv(file_name).iloc[:,2:].to_sql(file_name.replace('.csv',''),db,if_exists='replace')
將資料庫打開重新整理成個股資料
#建立函數取回con連結的資料庫下的所有表單
def sql_fetch(con):
date_list=[]
cursorObj = con.cursor()
cursorObj.execute('SELECT name from sqlite_master where type= "table"')
date_list.append(cursorObj.fetchall())
return date_list
date_list = sql_fetch(db)
#將資料表一個一個打開並放到d_list裡面
from progressbar import *
d_list=[]
for index,date in enumerate(date_list[0]):
# print(date)
d_list.append(date_list[0][index][0])
total_df = pd.DataFrame()
pbar = ProgressBar().start() #建立進度條
for i,date in enumerate(d_list):
pbar.update(int((i / (len(d_list) - 1)) * 100)) #更新進度條
df = pd.read_sql(con=db,sql='SELECT * FROM' + ' "'+ date +'"')
df['Date'] = date
total_df = total_df.append(df)
pbar.finish() #關閉進度條
將個股groupby合併 並寫入資料庫
dbname_2 = 'TWStock2019_by_index' #寫入資料庫名稱
db2 = sqlite3.connect(dbname_2)
pbar = ProgressBar().start() #建立進度條
i=1
for key in total_dict.keys(): #將個股依序寫路資料庫中
pbar.update(int((i / (len(total_dict.keys()) - 1)) * 100)) #更新進度條
df=total_dict[key]
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values(by=['Date'],ascending=True )
df.to_sql(key,db2,if_exists='replace')
i+=1
pbar.finish() #關閉進度條
#安裝DB Browser for SQLite查看資料庫
#安裝DB Browser for SQLite查看資料庫
將資料庫data取出分析, 以2330台積電為例
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
from matplotlib import ticker as mticker
from mpl_finance import candlestick_ohlc
from matplotlib.dates import DateFormatter
import datetime as dt
import matplotlib.dates as mdates
#連結
dbname_2 = 'TWStock2019_by_index' #資料庫名稱
db = sqlite3.connect(dbname_2) #連結資料表
df = pd.read_sql(con=db,sql='SELECT * FROM "2330"') #讀取2330資料表裡的所有資料
df=df.set_index('Date')
df_plot = df[['開盤價','最高價','最低價','收盤價','成交股數']]
df_plot['成交股數']=df_plot['成交股數'].str.replace(',','')
#更改欄位名稱
df_plot.rename(columns={'開盤價':'open',
'最高價':'high',
'最低價':'low',
'收盤價':'close',
'成交股數':'volume'},
inplace=True)
df_plot=df_plot.reset_index()
#建立函數轉換Date欄位的資料型態從字串變成日期
from datetime import datetime
def convDate(date):
datetime_object = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
return datetime_object
df_plot['Date']=df_plot['Date'].apply(convDate)
#將日期轉換成浮點數
df_plot['DateTime'] = mdates.date2num(df_plot['Date'])
#將股價數值轉變為浮點數
df_plot[['open','high','low','close']]=df_plot[['open','high','low','close']].astype('float32')
#畫出股價圖
fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))
df_2=df_plot[['DateTime','open','high','low','close']].copy()
df_2
df_2['5d']=np.round(df_2['close'].rolling(5).mean(),2)
df_2['10d']=np.round(df_2['close'].rolling(10).mean(),2)
plt.plot(df_2['DateTime'],df_2['5d'])
plt.plot(df_2['DateTime'],df_2['10d'])
df_3=df_plot[['DateTime','open','high','low','close']].copy()
candlestick_ohlc(ax1, df_3.values, width=.1, colorup='#53c156', colordown='#ff1717')
plt.show()