在量化投资中,回测是验证策略有效性的重要环节。本文将带你从零开始搭建一个简单的回测框架。
回测框架的核心组件
一个完整的回测系统通常包含以下模块:
- 数据层:获取和管理历史数据
- 策略层:定义交易逻辑
- 执行层:模拟订单执行
- 评估层:计算性能指标
框架设计
1. 数据管理器
import pandas as pd
import numpy as np
from datetime import datetime
class DataHandler:
def __init__(self, symbol, start_date, end_date):
self.symbol = symbol
self.start_date = start_date
self.end_date = end_date
self.data = None
def load_data(self, filepath):
self.data = pd.read_csv(
filepath,
parse_dates=['date'],
index_col='date'
)
self.data = self.data.loc[self.start_date:self.end_date]
return self.data
def get_latest_bar(self, N=1):
return self.data.iloc[-N:]
2. 策略基类
class Strategy:
def __init__(self, data):
self.data = data
self.signals = pd.DataFrame(index=data.index)
self.signals['signal'] = 0
def generate_signals(self):
raise NotImplementedError("需要实现 generate_signals 方法")
3. 双均线策略示例
class MovingAverageCrossStrategy(Strategy):
def __init__(self, data, short_window=5, long_window=20):
super().__init__(data)
self.short_window = short_window
self.long_window = long_window
def generate_signals(self):
self.signals['short_ma'] = self.data['close'].rolling(
window=self.short_window
).mean()
self.signals['long_ma'] = self.data['close'].rolling(
window=self.long_window
).mean()
self.signals['signal'] = 0
self.signals.loc[
self.signals['short_ma'] > self.signals['long_ma'],
'signal'
] = 1
self.signals.loc[
self.signals['short_ma'] < self.signals['long_ma'],
'signal'
] = -1
self.signals['positions'] = self.signals['signal'].diff()
return self.signals
4. 回测引擎
class Backtester:
def __init__(self, symbol, signals, initial_capital=100000.0):
self.symbol = symbol
self.signals = signals
self.initial_capital = float(initial_capital)
self.positions = self.generate_positions()
def generate_positions(self):
positions = pd.DataFrame(index=self.signals.index).fillna(0.0)
positions[self.symbol] = 100 * self.signals['signal']
return positions
def backtest_portfolio(self, data):
portfolio = pd.DataFrame(index=data.index)
portfolio['holdings'] = self.positions[self.symbol] * data['close']
portfolio['cash'] = self.initial_capital - \
(self.positions[self.symbol].diff() * data['close']).cumsum()
portfolio['total'] = portfolio['cash'] + portfolio['holdings']
portfolio['returns'] = portfolio['total'].pct_change()
return portfolio
5. 性能评估
class PerformanceAnalyzer:
def __init__(self, portfolio, data):
self.portfolio = portfolio
self.data = data
def calculate_metrics(self):
returns = self.portfolio['returns'].dropna()
total_return = (
self.portfolio['total'].iloc[-1] /
self.portfolio['total'].iloc[0] - 1
) * 100
days = (self.portfolio.index[-1] - self.portfolio.index[0]).days
annual_return = ((1 + total_return/100) ** (365/days) - 1) * 100
sharpe_ratio = (
returns.mean() / returns.std() * np.sqrt(252)
)
cumulative = (1 + returns).cumprod()
running_max = cumulative.expanding().max()
drawdown = (cumulative - running_max) / running_max
max_drawdown = drawdown.min() * 100
win_rate = len(returns[returns > 0]) / len(returns[returns != 0]) * 100
metrics = {
'总收益率 (%)': round(total_return, 2),
'年化收益率 (%)': round(annual_return, 2),
'夏普比率': round(sharpe_ratio, 2),
'最大回撤 (%)': round(max_drawdown, 2),
'胜率 (%)': round(win_rate, 2),
'交易次数': len(returns[returns != 0])
}
return metrics
def print_metrics(self):
metrics = self.calculate_metrics()
print("=" * 50)
print("策略性能报告")
print("=" * 50)
for key, value in metrics.items():
print(f"{key}: {value}")
print("=" * 50)
完整示例
if __name__ == "__main__":
data_handler = DataHandler(
symbol='AAPL',
start_date='2023-01-01',
end_date='2024-12-31'
)
data = data_handler.load_data('aapl_data.csv')
strategy = MovingAverageCrossStrategy(
data=data,
short_window=5,
long_window=20
)
signals = strategy.generate_signals()
backtester = Backtester(
symbol='AAPL',
signals=signals,
initial_capital=100000
)
portfolio = backtester.backtest_portfolio(data)
analyzer = PerformanceAnalyzer(portfolio, data)
analyzer.print_metrics()
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
ax1.plot(data.index, data['close'], label='价格')
ax1.plot(signals.index, signals['short_ma'], label='短期均线')
ax1.plot(signals.index, signals['long_ma'], label='长期均线')
ax1.set_ylabel('价格')
ax1.legend()
ax2.plot(portfolio.index, portfolio['total'])
ax2.set_ylabel('组合价值')
ax2.set_xlabel('日期')
plt.tight_layout()
plt.show()
框架优化方向
1. 交易成本
def apply_commission(self, trades, commission_rate=0.001):
return trades * (1 - commission_rate)
2. 滑点模拟
def apply_slippage(self, price, slippage_pct=0.001):
return price * (1 + slippage_pct)
3. 仓位管理
def kelly_criterion(self, win_rate, win_loss_ratio):
return (win_rate * win_loss_ratio - (1 - win_rate)) / win_loss_ratio
注意事项
- 避免未来信息泄露:确保策略只使用历史数据
- 数据质量:处理停牌、分红、拆股等公司行为
- 过度拟合:避免针对历史数据过度优化参数
- 现实约束:考虑流动性、交易限制等实际因素
总结
本文介绍了一个基础的回测框架,包含:
- 模块化设计便于扩展
- 清晰的策略接口
- 完整的性能评估
在实际应用中,可以根据需求添加更多功能:
- 多资产组合回测
- 风险管理模块
- 参数优化工具
- 实时交易接口
下一篇文章将介绍如何使用机器学习方法优化策略参数。