Machine Learning for Trading--1-6 Histograms and Scatter Plots

  • 生成柱状统计图–Plot a histogram
  • 柱状图统计分析–Computing Histogram Statistics
  • 同时plot两个柱状图–Plot Two Histograms together
  • 散点图–Scatterplots

生成柱状统计图–Plot a histogram

  • 日回报率 – daily_returns[1:] = (df[1:] / df[:-1].values) - 1
  • 生成柱状图 – daily_returns.hist(bins=20)

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
import matplotlib.pyplot as plt

from util import get_data, plot_data

def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()
daily_returns[1:] = (df[1:] / df[:-1].values) - 1
daily_returns.ix[0, :] = 0 # set daily returns for row 0 to 0
return daily_returns

def test_run():
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY']
df = get_data(symbols, dates)
plot_data(df)

# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist() # default number of bins, 10
daily_returns.hist(bins=20) # changing no. of bins to 20
plt.show()

if __name__ == "__main__":
test_run()

柱状图统计分析–Computing Histogram Statistics

  • 计算均值 – mean = daily_returns[‘SPY’].mean()
  • 计算方差 – std = daily_returns[‘SPY’].std()
  • 添加垂线 – plt.axvline(mean)
  • 计算样本峰度kurtosis – daily_returns.kurtosis() 注:表征概率密度分布曲线在平均值处峰值的高低,数值越大越瘦长。

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import matplotlib.pyplot as plt

from util import get_data, plot_data

def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()
daily_returns[1:] = (df[1:] / df[:-1].values) - 1
daily_returns.ix[0, :] = 0 # set daily returns for row 0 to 0
return daily_returns

def test_run():
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY']
df = get_data(symbols, dates)
plot_data(df)

# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist(bins=20) # changing no. of bins to 20

# Get mean and standard deviation
mean = daily_returns['SPY'].mean()
print ("mean=", mean)
std = daily_returns['SPY'].std()
print ("std=", std)

plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
plt.show()

# Compute kurtosis
print (daily_returns.kurtosis())

if __name__ == "__main__":
test_run()

同时plot两个柱状图–Plot Two Histograms together

  • 两张图分开左右显示 – plot_data(daily_returns) 注:daily_returns 为包含两列不同数据的表格
  • 两张图同时显示 – daily_returns[‘SPY’].hist(bins=20, label=”SPY”) daily_returns[‘XOM’].hist(bins=20, label=”XOM”)

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import matplotlib.pyplot as plt

from util import get_data, plot_data

def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()
daily_returns[1:] = (df[1:] / df[:-1].values) - 1
daily_returns.ix[0, :] = 0 # set daily returns for row 0 to 0
return daily_returns

def test_run():
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY', 'XOM']
df = get_data(symbols, dates)
plot_data(df)

""" Two separate histograms ==========="""
# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist(bins=20)
plt.show()

""" Histograms on the same graph ======"""
# Compute daily returns
daily_returns = compute_daily_returns(df)

# Compute and plot both histograms on the same chart
daily_returns['SPY'].hist(bins=20, label="SPY")
daily_returns['XOM'].hist(bins=20, label="XOM")
plt.legend(loc='upper right')
plt.show()

if __name__ == "__main__":
test_run()

散点图–Scatterplots

  • 生成散点图 – daily_returns.plot(kind=’scatter’, x=’SPY’, y=’XOM’)
  • 数据拟合(线性) – beta_XOM, alpha_XOM= np.polyfit(daily_returns[‘SPY’], daily_returns[‘XOM’], 1)

Example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from util import get_data, plot_data

def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns = df.copy()
daily_returns[1:] = (df[1:] / df[:-1].values) - 1
daily_returns.ix[0, :] = 0 # set daily returns for row 0 to 0
return daily_returns

def test_run():
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY', 'XOM', 'GLD']
df = get_data(symbols, dates)

# Compute daily returns
daily_returns = compute_daily_returns(df)

# Scatterplot SPY vs XOM
daily_returns.plot(kind='scatter', x='SPY', y='XOM')
beta_XOM, alpha_XOM= np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1)
print ("beta_XOM= ", beta_XOM)
print ("alpha_XOM=", alpha_XOM)
plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-',color='r')
plt.show()

# Scatterplot SPY vs GLD
daily_returns.plot(kind='scatter', x='SPY', y='GLD')
beta_GLD, alpha_GLD= np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1)
print ("beta_GLD= ", beta_GLD)
print ("alpha_GLD=", alpha_GLD)
plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-',color='r')
plt.show()


if __name__ == "__main__":
test_run()
您的支持将鼓励我继续创作