Machine Learning for Trading--1-5 Incomplete Data

  • 使用fillna()函数–Using Fillna()
  • 填充缺失数据–Fill missing values

使用fillna()函数–Using Fillna()

  • 向前填充 – df.fillna(method=”ffill”, inplace=True)
  • 向后填充 – df.fillna(method=”bfill”, inplace=True)
  • 注:inplace为True时直接修改原对象,False时创建并修改副本

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

#----------function to get path of the symbol-------------
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))

#--------------------Reads csv----------------------------
def get_data(symbollist, dates):
df_final=pd.DataFrame(index=dates)
if "SPY" not in symbollist:
symbollist.insert(0,"SPY")
for symbol in symbollist:
file_path=symbol_to_path(symbol)
df_temp=pd.read_csv(file_path, parse_dates=True, index_col="Date",usecols=["Date", "Adj Close"], na_values=["nan"])
df_temp=df_temp.rename(columns={'Adj Close':symbol})
df_final=df_final.join(df_temp)
if symbol == "SPY":
df_final=df_final.dropna(subset=['SPY'])
return df_final

#--------------------plot function-------------------------
def plot(df_data):
ax=df_data.plot(title="Incomplete Data", fontsize=2)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()

if __name__ == '__main__':
#list of symbols
#symbollist=["PSX", "FAKE1", "FAKE2"]
symbollist=["FAKE2"]
#date range
start_date='2005-12-31'
end_date='2014-12-07'
#create date range
idx=pd.date_range(start_date,end_date)
#get adjusted close of each symbol
df_data=get_data(symbollist,idx)
df_data.fillna(method="ffill",inplace=True)
plot(df_data)

填充缺失数据–Fill missing values

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

def fill_missing_values(df_data):
"""Fill missing values in data frame, in place."""
##########################################################
pass # QUIZ: Your code here (DO NOT modify anything else)
df_data.fillna(method="ffill", inplace=True)
df_data.fillna(method="bfill", inplace=False)
##########################################################

def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df_final = pd.DataFrame(index=dates)
if "SPY" not in symbols: # add SPY for reference, if absent
symbols.insert(0, "SPY")

for symbol in symbols:
file_path = symbol_to_path(symbol)
df_temp = pd.read_csv(file_path, parse_dates=True, index_col="Date",
usecols=["Date", "Adj Close"], na_values=["nan"])
df_temp = df_temp.rename(columns={"Adj Close": symbol})
df_final = df_final.join(df_temp)
if symbol == "SPY": # drop dates SPY did not trade
df_final = df_final.dropna(subset=["SPY"])

return df_final

def plot_data(df_data):
"""Plot stock data with appropriate axis labels."""
ax = df_data.plot(title="Stock Data", fontsize=2)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()

def test_run():
"""Function called by Test Run."""
# Read data
symbol_list = ["JAVA", "FAKE1", "FAKE2"] # list of symbols
start_date = "2005-12-31"
end_date = "2014-12-07"
dates = pd.date_range(start_date, end_date) # date range as index
df_data = get_data(symbol_list, dates) # get data for each symbol

# Fill missing values
fill_missing_values(df_data)

# Plot
plot_data(df_data)

if __name__ == "__main__":
test_run()
您的支持将鼓励我继续创作