Browse AI-generated trading strategies shared by the community. Fork, learn, and build on each other's work.
| Score▼ | Strategy | Author | Win Rate▼ | Return▼ | PF▼ | MDD▼ | Trades▼ | Actions | ||
|---|---|---|---|---|---|---|---|---|---|---|
|
🥇
|
GBP/USD SMA Trend Gradient Boosting Risk-Adj
Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. GradientBoostingClassifier chosen for its strong bias-variance tradeof…
|
R
@ratio_witch
|
GBPUSD | 15min | 43.1%71.4% | +6.85%+22.83% | 1.712.84 | 2.69%2.69% | 7214 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:47:56
# Model : Gradient Boosting
# Feature Eng. : SMA (20,50,200) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── SMA features (required) ──────────────────────────────────────────────
for period in [20, 50, 200]:
sma = close.rolling(period).mean()
df[f"sma_{period}"] = sma
df[f"dm_sma_{period}"] = (close - sma) / sma
# ── SMA crossover signals ────────────────────────────────────────────────
sma_20 = close.rolling(20).mean()
sma_50 = close.rolling(50).mean()
sma_200 = close.rolling(200).mean()
df["sma_20_50_cross"] = np.where(sma_20 > sma_50, 1.0, -1.0)
df["sma_20_200_cross"] = np.where(sma_20 > sma_200, 1.0, -1.0)
df["sma_50_200_cross"] = np.where(sma_50 > sma_200, 1.0, -1.0)
# ── Price momentum features ──────────────────────────────────────────────
for lag in [1, 2, 4, 8, 16]:
df[f"ret_{lag}"] = close.pct_change(lag)
# ── Volatility: rolling std of returns ──────────────────────────────────
ret_1 = close.pct_change(1)
for window in [8, 20, 50]:
df[f"vol_{window}"] = ret_1.rolling(window).std()
# ── ATR (Average True Range) ─────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
for atr_period in [14, 50]:
atr = tr.rolling(atr_period).mean()
df[f"atr_{atr_period}"] = atr
df[f"natr_{atr_period}"] = atr / close
# ── RSI ──────────────────────────────────────────────────────────────────
for rsi_period in [14, 28]:
delta = close.diff()
gain = delta.clip(lower=0).rolling(rsi_period).mean()
loss = (-delta.clip(upper=0)).rolling(rsi_period).mean()
rs = gain / (loss + 1e-10)
df[f"rsi_{rsi_period}"] = 100 - (100 / (1 + rs))
# ── MACD ─────────────────────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
df["macd_hist_norm"] = (macd_line - signal_line) / (close + 1e-10)
# ── Bollinger Bands ───────────────────────────────────────────────────────
for bb_period in [20, 50]:
bb_mid = close.rolling(bb_period).mean()
bb_std = close.rolling(bb_period).std()
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
bb_width = (bb_upper - bb_lower) / (bb_mid + 1e-10)
bb_pos = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
df[f"bb_width_{bb_period}"] = bb_width
df[f"bb_pos_{bb_period}"] = bb_pos
# ── Stochastic Oscillator ────────────────────────────────────────────────
for stoch_period in [14, 28]:
lowest_low = low.rolling(stoch_period).min()
highest_high = high.rolling(stoch_period).max()
stoch_k = (close - lowest_low) / (highest_high - lowest_low + 1e-10) * 100
stoch_d = stoch_k.rolling(3).mean()
df[f"stoch_k_{stoch_period}"] = stoch_k
df[f"stoch_d_{stoch_period}"] = stoch_d
# ── Rate of Change (ROC) ──────────────────────────────────────────────────
for roc_period in [5, 10, 20]:
df[f"roc_{roc_period}"] = close.pct_change(roc_period)
# ── Candle body and shadow features ──────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).abs()
df["body_ratio"] = body / (candle_range + 1e-10)
df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_range + 1e-10)
df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_range + 1e-10)
df["bullish_candle"] = np.where(close > open_, 1.0, -1.0)
# ── Volume-proxy: candle range as volatility proxy ────────────────────────
df["range_norm"] = candle_range / (close + 1e-10)
df["range_ma_ratio"] = candle_range / (candle_range.rolling(20).mean() + 1e-10)
# ── Lag features for return predictors ───────────────────────────────────
for col_lag in ["rsi_14", "macd_hist", "bb_pos_20"]:
for lag in [1, 2, 3]:
df[f"{col_lag}_lag{lag}"] = df[col_lag].shift(lag)
# ── Distance of close from recent high/low ────────────────────────────────
for lookback in [10, 20, 50]:
roll_high = high.rolling(lookback).max()
roll_low = low.rolling(lookback).min()
df[f"dist_high_{lookback}"] = (close - roll_high) / (roll_high + 1e-10)
df[f"dist_low_{lookback}"] = (close - roll_low) / (roll_low + 1e-10)
# ── Trend strength: ADX proxy ─────────────────────────────────────────────
adx_period = 14
tr_adx = tr.copy()
plus_dm = pd.Series(np.where((high.diff() > 0) & (high.diff() > -low.diff()), high.diff(), 0.0), index=close.index)
minus_dm = pd.Series(np.where((-low.diff() > 0) & (-low.diff() > high.diff()), -low.diff(), 0.0), index=close.index)
atr_adx = tr_adx.rolling(adx_period).mean()
plus_di = 100 * plus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10)
minus_di = 100 * minus_dm.rolling(adx_period).mean() / (atr_adx + 1e-10)
dx = (100 * (plus_di - minus_di).abs() / (plus_di + minus_di + 1e-10))
df["adx"] = dx.rolling(adx_period).mean()
df["plus_di"] = plus_di
df["minus_di"] = minus_di
# ── Fill NaN from indicator warm-up ──────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "GBP/USD SMA Trend Gradient Boosting Risk-Adj",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"min_samples_leaf": 20,
"max_features": "sqrt",
"n_iter_no_change": 30,
"validation_fraction": 0.1,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.57,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 18],
"min_atr": 0.0002,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on GBP/USD 15-min data. "
"GradientBoostingClassifier chosen for its strong bias-variance tradeoff "
"on medium-sized tabular datasets without needing GPU. "
"Hyperparameters: moderate depth=4 prevents overfitting, learning_rate=0.04 "
"with 400 estimators balances convergence vs generalisation, subsample=0.75 "
"adds stochasticity to reduce variance, min_samples_leaf=20 enforces statistical "
"significance at each leaf. Early stopping via n_iter_no_change guards against "
"overfit on the training fold. Signal threshold 0.57 filters marginal signals "
"to improve precision. SL=0.5%, TP=1.0% gives 1:2 RR. Session filter 6-18 UTC "
"covers London+NY overlap — highest GBP/USD liquidity and tighter spreads. "
"sma_50 trend filter ensures we only trade in the direction of medium-term trend, "
"reducing whipsaw losses. target_horizon=4 bars (1 hour) gives the model enough "
"time for moves to develop while staying relevant for intraday trading."
),
"notes": (
"Features: SMA 20/50/200 with distance metrics (core requirement), RSI 14/28, "
"MACD, Bollinger Bands 20/50, Stochastic 14/28, ATR 14/50, NATR, ROC, ADX, "
"candle body/shadow ratios, lagged RSI/MACD/BB features, distance from rolling "
"high/low, SMA crossover signals, multi-lag return features. "
"All features are backward-looking only (no lookahead bias). "
"on_opposite=reverse for fast trend-following entries without missing reversals."
),
}
|
||||||||||
|
🥈
|
EMA Cross 9/21 + RSI14 Gradient Boost Scalper
Maximize risk-adjusted return (Sharpe/Calmar) using a GradientBoostingClassifier with EMA 9/21 crossover as the primary signal source and RS…
|
P
@pivot_kid
|
EURUSD | 15min | 43.3%47.6% | +11.79%+7.76% | 2.921.44 | 0.83%0.83% | 6721 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:37:57
# Model : Gradient Boosting
# Feature Eng. : EMA (9,21), RSI 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# --- EMA 9 and EMA 21 (required) ---
ema_9 = close.ewm(span=9, adjust=False).mean()
ema_21 = close.ewm(span=21, adjust=False).mean()
df["ema_9"] = ema_9
df["ema_21"] = ema_21
df["dm_ema_9"] = (close - ema_9) / ema_9
df["dm_ema_21"] = (close - ema_21) / ema_21
# EMA crossover signal: positive when fast > slow
df["ema_cross"] = ema_9 - ema_21
# Crossover direction change (sign flip)
df["ema_cross_signal"] = np.sign(df["ema_cross"])
df["ema_cross_prev"] = df["ema_cross_signal"].shift(1)
df["ema_cross_flip"] = (df["ema_cross_signal"] != df["ema_cross_prev"]).astype(float)
# --- RSI 14 (required) ---
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=13, adjust=False).mean()
avg_loss = loss.ewm(com=13, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-10)
rsi_14 = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi_14
# RSI normalised to [-1, 1] range
df["rsi_norm"] = (rsi_14 - 50) / 50
# RSI overbought/oversold flags
df["rsi_ob"] = np.where(rsi_14 > 70, 1.0, 0.0)
df["rsi_os"] = np.where(rsi_14 < 30, 1.0, 0.0)
# --- Additional momentum and volatility features ---
# EMA 50 for trend context
ema_50 = close.ewm(span=50, adjust=False).mean()
df["ema_50"] = ema_50
df["dm_ema_50"] = (close - ema_50) / ema_50
# Price momentum: rate of change over multiple horizons
df["roc_4"] = close.pct_change(4)
df["roc_8"] = close.pct_change(8)
df["roc_16"] = close.pct_change(16)
# ATR (Average True Range) for volatility
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr_14 = tr.ewm(span=14, adjust=False).mean()
df["atr_14"] = atr_14
# Normalised ATR
df["natr_14"] = atr_14 / close
# Bollinger Bands (20-period, 2 std)
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower + 1e-10)
df["bb_width"] = (bb_upper - bb_lower) / (bb_mid + 1e-10)
# BB squeeze: narrow bands signal potential breakout
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).mean(), 1.0, 0.0)
# MACD-like: difference between two EMAs
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
df["macd_line"] = macd_line / (close + 1e-10)
df["macd_signal"] = macd_signal / (close + 1e-10)
df["macd_hist"] = (macd_line - macd_signal) / (close + 1e-10)
df["macd_cross"] = np.sign(macd_line - macd_signal)
# Stochastic oscillator (14-period)
lowest_low = low.rolling(14).min()
highest_high = high.rolling(14).max()
stoch_k = 100 * (close - lowest_low) / (highest_high - lowest_low + 1e-10)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_diff"] = stoch_k - stoch_d
# Volume of price movement (candle body and shadows)
df["body"] = (close - open_).abs() / (atr_14 + 1e-10)
df["upper_shadow"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (atr_14 + 1e-10)
df["lower_shadow"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (atr_14 + 1e-10)
df["candle_dir"] = np.sign(close - open_)
# Rolling volatility (realised vol over 20 bars)
log_ret = np.log(close / close.shift(1))
df["realvol_20"] = log_ret.rolling(20).std()
# Close relative to recent high/low channel (20-bar)
roll_high_20 = high.rolling(20).max()
roll_low_20 = low.rolling(20).min()
df["chan_pct_20"] = (close - roll_low_20) / (roll_high_20 - roll_low_20 + 1e-10)
# Lagged RSI and EMA cross for temporal context
df["rsi_14_lag1"] = rsi_14.shift(1)
df["rsi_14_lag2"] = rsi_14.shift(2)
df["ema_cross_lag1"] = df["ema_cross"].shift(1)
df["ema_cross_lag2"] = df["ema_cross"].shift(2)
df["macd_hist_lag1"] = df["macd_hist"].shift(1)
# RSI momentum: change in RSI
df["rsi_delta_1"] = rsi_14.diff(1)
df["rsi_delta_4"] = rsi_14.diff(4)
# EMA9 slope (normalised)
df["ema9_slope"] = ema_9.diff(3) / (ema_9.shift(3) + 1e-10)
df["ema21_slope"] = ema_21.diff(3) / (ema_21.shift(3) + 1e-10)
# Interaction: RSI * EMA cross direction
df["rsi_ema_cross_interact"] = df["rsi_norm"] * df["ema_cross_signal"]
# Fill NaN from indicator warm-up
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EMA Cross 9/21 + RSI14 Gradient Boost Scalper",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"min_samples_leaf": 20,
"min_samples_split": 40,
"max_features": "sqrt",
"n_iter_no_change": 30,
"validation_fraction": 0.1,
"tol": 1e-4,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 17],
"min_atr": 0.0002,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) using a GradientBoostingClassifier "
"with EMA 9/21 crossover as the primary signal source and RSI 14 as confirmation. "
"Deep feature set includes MACD, Bollinger Bands, Stochastic, ATR normalisation, "
"candle structure, lagged features and RSI/EMA interaction terms. "
"Gradient boosting chosen for its ability to capture non-linear interactions between "
"trend, momentum and volatility features without overfitting when regularised via "
"subsample, max_features, and early stopping. Threshold 0.56 filters marginal signals. "
"Session filter [7,17] focuses on London/NY overlap for highest EUR/USD liquidity. "
"SL 0.5% / TP 1.0% gives 1:2 risk-reward aligned with scalper momentum targets. "
"Reverse on opposite signal to stay in sync with fast EMA crossover momentum."
),
"notes": (
"EMA 9/21 cross captures short-term momentum shifts typical of active EUR/USD sessions. "
"RSI 14 filters entries in extreme overbought/oversold conditions. "
"NATR min_atr filter removes flat/low-vol periods. "
"Trend filter (SMA 50) ensures longs only above and shorts only below the medium-term trend. "
"n_iter_no_change=30 provides early stopping to prevent overfitting on the training split. "
"400 estimators with depth 4 and lr 0.04 balance bias-variance tradeoff for intraday data."
),
}
|
||||||||||
|
🥉
|
USD/JPY BB Mean-Reversion + ATR Gradient Boost
Maximise Sharpe ratio via a Gradient Boosting classifier trained on Bollinger Band position (bb_pct), normalised bandwidth (bb_width), ATR/N…
|
R
@ratio_witch
|
USDJPY | 15min | 60.2%66.3% | +4.28%+11.89% | 1.231.87 | 2.32%2.32% | 16692 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:01:39
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDJPY_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (period=20, std_dev=2.0) ──────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
# bb_width: normalised band width (volatility proxy)
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
# bb_pct: position of close within the band [0, 1]
band_range = bb_upper - bb_lower
df["bb_pct"] = (close - bb_lower) / band_range
# Distance from close to mid in units of band width
df["bb_dist_mid"] = (close - bb_mid) / bb_mid
# ── ATR (period=14) ───────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Momentum / trend features ─────────────────────────────────────────────
# Rate of change at multiple horizons
for n in [1, 4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# RSI (14)
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_gain / (avg_loss + 1e-10)
rsi = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi
# RSI derived: distance from 50 (centred, normalised)
df["rsi_dev"] = (rsi - 50) / 50
# ── MACD (12, 26, 9) ──────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - macd_signal
df["macd_line"] = macd_line
df["macd_signal"] = macd_signal
df["macd_hist"] = macd_hist
# ── Trend (SMA 50) ────────────────────────────────────────────────────────
sma50 = close.rolling(50).mean()
df["sma_50"] = sma50
df["close_vs_sma50"] = (close - sma50) / sma50 # normalised distance
# ── Volume / candle structure features ────────────────────────────────────
body = (close - open_).abs()
candle_rng = high - low
df["body_ratio"] = body / (candle_rng + 1e-10) # body as fraction of range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / (candle_rng + 1e-10)
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / (candle_rng + 1e-10)
df["candle_dir"] = np.where(close > open_, 1.0, -1.0) # bullish / bearish bar
# ── Lagged bb_pct & rsi (to give the model recent history) ───────────────
for lag in [1, 2, 3]:
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── Volatility regime flag ────────────────────────────────────────────────
natr_ma = natr.rolling(50).mean()
df["vol_regime"] = np.where(natr > natr_ma, 1.0, 0.0) # 1 = high-vol regime
# ── BB squeeze detection ──────────────────────────────────────────────────
bb_width_ma = df["bb_width"].rolling(50).mean()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0)
# ── Mean-reversion signal strength ────────────────────────────────────────
# Positive → oversold (close below lower band), Negative → overbought
df["mr_signal"] = 0.5 - df["bb_pct"] # centred: +0.5 at lower band, -0.5 at upper
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/JPY BB Mean-Reversion + ATR Gradient Boost",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 500,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.8,
"min_samples_leaf": 20,
"max_features": "sqrt",
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": None,
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise Sharpe ratio via a Gradient Boosting classifier trained on "
"Bollinger Band position (bb_pct), normalised bandwidth (bb_width), "
"ATR/NATR, RSI, MACD histogram, candle structure, and lagged features. "
"GBM chosen for its ability to capture non-linear interactions between "
"volatility (ATR) and mean-reversion (BB) signals. n_iter_no_change "
"acts as early stopping to prevent overfitting on the 15-min USDJPY series. "
"SL=0.5% / TP=1.0% gives a 1:2 risk-reward; threshold=0.55 reduces noise trades."
),
"notes": (
"Bollinger Bands are the primary mean-reversion anchor; ATR/NATR filter "
"entries to adequate volatility bars. RSI and MACD provide momentum context "
"to avoid fading strong trends. Lagged features (up to 3 bars) give the model "
"short-term regime memory without look-ahead. vol_regime and bb_squeeze flags "
"allow the model to differentiate trending vs. ranging conditions automatically. "
"No session filter applied — USDJPY is liquid across Asian and European sessions."
),
}
|
||||||||||
|
5.05
|
AUD/USD Stochastic BB Mean-Reversion (GBM)
Maximize risk-adjusted return (Sharpe/Calmar) on AUD/USD 15-min. GradientBoostingClassifier with moderate depth and learning rate chosen to …
|
P
@pivot_kid
|
AUDUSD | 15min | 64.8%68.0% | +7.88%+22.47% | 1.201.62 | 4.91%4.91% | 35897 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:24:20
# Model : Gradient Boosting
# Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_std_ = close.rolling(bb_period).std(ddof=1)
bb_upper = bb_mid + bb_std * bb_std_
bb_lower = bb_mid - bb_std * bb_std_
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi"] = 100.0 - (100.0 / (1.0 + rs))
# ── Stochastic Oscillator (K=14, D=3) ────────────────────────────────────
stoch_k = 14
stoch_d = 3
low_min = low.rolling(stoch_k).min()
high_max = high.rolling(stoch_k).max()
k_raw = 100.0 * (close - low_min) / (high_max - low_min).replace(0, np.nan)
df["stoch_k"] = k_raw
df["stoch_d"] = k_raw.rolling(stoch_d).mean()
# ── ATR (14) ─────────────────────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
df["atr"] = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["natr"] = df["atr"] / close
# ── Trend / Momentum features ─────────────────────────────────────────────
df["sma_20"] = close.rolling(20).mean()
df["sma_50"] = close.rolling(50).mean()
df["sma_100"] = close.rolling(100).mean()
df["price_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"]
df["price_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"]
df["price_vs_sma100"] = (close - df["sma_100"]) / df["sma_100"]
df["sma20_vs_sma50"] = (df["sma_20"] - df["sma_50"]) / df["sma_50"]
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────────
ema12 = close.ewm(span=12, min_periods=12).mean()
ema26 = close.ewm(span=26, min_periods=26).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, min_periods=9).mean()
df["macd"] = macd_line
df["macd_signal"] = macd_signal
df["macd_hist"] = macd_line - macd_signal
# ── Rate-of-Change features ───────────────────────────────────────────────
for p in [4, 8, 16]:
df[f"roc_{p}"] = close.pct_change(p)
# ── Volatility regime ────────────────────────────────────────────────────
df["vol_8"] = close.pct_change().rolling(8).std()
df["vol_20"] = close.pct_change().rolling(20).std()
df["vol_ratio"] = df["vol_8"] / df["vol_20"].replace(0, np.nan)
# ── Candle body / shadow features ────────────────────────────────────────
df["body"] = (close - open_).abs()
df["upper_shadow"] = high - pd.concat([close, open_], axis=1).max(axis=1)
df["lower_shadow"] = pd.concat([close, open_], axis=1).min(axis=1) - low
df["body_ratio"] = df["body"] / (high - low).replace(0, np.nan)
# ── RSI-derived features ──────────────────────────────────────────────────
df["rsi_above_50"] = np.where(df["rsi"] > 50, 1, 0)
df["rsi_overbought"] = np.where(df["rsi"] > 70, 1, 0)
df["rsi_oversold"] = np.where(df["rsi"] < 30, 1, 0)
df["rsi_lag1"] = df["rsi"].shift(1)
df["rsi_lag4"] = df["rsi"].shift(4)
# ── Stochastic-derived features ───────────────────────────────────────────
df["stoch_cross_up"] = np.where((df["stoch_k"] > df["stoch_d"]) &
(df["stoch_k"].shift(1) <= df["stoch_d"].shift(1)), 1, 0)
df["stoch_cross_down"] = np.where((df["stoch_k"] < df["stoch_d"]) &
(df["stoch_k"].shift(1) >= df["stoch_d"].shift(1)), 1, 0)
df["stoch_oversold"] = np.where(df["stoch_k"] < 20, 1, 0)
df["stoch_overbought"] = np.where(df["stoch_k"] > 80, 1, 0)
# ── BB-derived features ───────────────────────────────────────────────────
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.20), 1, 0)
df["above_bb_upper"] = np.where(close > bb_upper, 1, 0)
df["below_bb_lower"] = np.where(close < bb_lower, 1, 0)
df["bb_pct_lag1"] = df["bb_pct"].shift(1)
df["bb_pct_lag4"] = df["bb_pct"].shift(4)
# ── Session hour (UTC) ────────────────────────────────────────────────────
df["hour_utc"] = df.index.hour if hasattr(df.index, "hour") else 0
# ── Fill NaN from indicator warm-up ──────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD Stochastic BB Mean-Reversion (GBM)",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.80,
"min_samples_leaf": 20,
"max_features": "sqrt",
"n_iter_no_change": 30,
"validation_fraction": 0.10,
"tol": 1e-4,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": None,
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on AUD/USD 15-min. "
"GradientBoostingClassifier with moderate depth and learning rate chosen "
"to balance bias-variance. 2:1 reward-to-risk (SL=0.5%, TP=1.0%). "
"Stochastic crossovers, BB mean-reversion, and RSI regime signals "
"form the core feature set; MACD, volatility, and candle features add "
"context. Early stopping (n_iter_no_change=30) prevents overfitting."
),
"notes": (
"Features: Bollinger Bands (20,2) width/pct, RSI(14) with lag/regime flags, "
"Stochastic(14,3) K/D with crossover detection, ATR/NATR volatility, MACD "
"histogram, short/medium SMAs, ROC(4/8/16), volatility ratio, candle body "
"ratios, and UTC session hour. No session or trend filter to allow full "
"mean-reversion opportunities across all sessions."
),
}
|
||||||||||
|
3.97
|
NZD/USD RSI-MACD Gradient Boost Risk-Adjusted
Maximize risk-adjusted return (Sharpe/Calmar) using a deep GradientBoostingClassifier with many slow-learning trees and aggressive regularis…
|
S
@silver-bull-130
|
NZDUSD | 15min | 60.9%0.0% | +18.36%+0.00% | 1.35— | 3.80%3.80% | 7320 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:35:33
# Model : Gradient Boosting
# Feature Eng. : RSI 14, MACD (12,26,9) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/NZDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── RSI 14 ──────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
avg_loss = loss.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi_14"] = 100 - (100 / (1 + rs))
# RSI derived signals
df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1, 0) # overbought flag
df["rsi_os"] = np.where(df["rsi_14"] < 30, 1, 0) # oversold flag
df["rsi_mid"] = df["rsi_14"] - 50 # centred
df["rsi_slope"] = df["rsi_14"].diff(3) # momentum of RSI
df["rsi_accel"] = df["rsi_slope"].diff(2) # acceleration
# RSI regime: above/below 50
df["rsi_bull"] = np.where(df["rsi_14"] > 50, 1, -1)
# ── MACD (12, 26, 9) ────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - signal_line
df["macd_line"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_hist
# MACD derived
df["macd_cross"] = np.where(macd_line > signal_line, 1, -1)
df["macd_hist_sign"] = np.where(macd_hist > 0, 1, -1)
df["macd_hist_chg"] = macd_hist.diff(1) # histogram change
df["macd_hist_accel"]= df["macd_hist_chg"].diff(1) # second derivative
df["macd_zero_cross"]= np.where(macd_line > 0, 1, -1)
# ── ATR 14 ──────────────────────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr14 = tr.ewm(alpha=1/14, min_periods=14, adjust=False).mean()
df["atr_14"] = atr14
df["natr_14"] = atr14 / close # normalised ATR
df["atr_ratio"]= atr14 / atr14.rolling(50).mean() # current vs recent vol
# ── Volatility regime ───────────────────────────────────────────────────
df["vol_high"] = np.where(df["natr_14"] > df["natr_14"].rolling(100).median(), 1, 0)
# ── Price momentum ──────────────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_3"] = close.pct_change(3)
df["ret_8"] = close.pct_change(8)
df["ret_16"] = close.pct_change(16)
# Scaled by ATR so the model sees normalised moves
df["ret_1_atr"] = df["ret_1"] / (atr14 / close).replace(0, np.nan)
df["ret_3_atr"] = df["ret_3"] / (atr14 / close).replace(0, np.nan)
df["ret_8_atr"] = df["ret_8"] / (atr14 / close).replace(0, np.nan)
# ── EMAs & trend structure ───────────────────────────────────────────────
ema8 = close.ewm(span=8, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
ema50 = close.ewm(span=50, adjust=False).mean()
ema100= close.ewm(span=100,adjust=False).mean()
df["ema8_21_spread"] = (ema8 - ema21) / close
df["ema21_50_spread"]= (ema21 - ema50) / close
df["ema50_100_spread"]= (ema50 - ema100) / close
df["price_vs_ema21"] = (close - ema21) / close
df["price_vs_ema50"] = (close - ema50) / close
df["trend_align"] = np.where(
(ema8 > ema21) & (ema21 > ema50), 1,
np.where((ema8 < ema21) & (ema21 < ema50), -1, 0)
)
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_up = bb_mid + 2 * bb_std
bb_lo = bb_mid - 2 * bb_std
bb_bw = (bb_up - bb_lo) / bb_mid # bandwidth
bb_pct = (close - bb_lo) / (bb_up - bb_lo) # %B
df["bb_pct"] = bb_pct
df["bb_bw"] = bb_bw
df["bb_bw_ratio"] = bb_bw / bb_bw.rolling(50).mean() # squeeze detector
df["bb_upper_touch"] = np.where(close >= bb_up, 1, 0)
df["bb_lower_touch"] = np.where(close <= bb_lo, 1, 0)
# ── Stochastic %K %D (14, 3) ────────────────────────────────────────────
lo14 = low.rolling(14).min()
hi14 = high.rolling(14).max()
stoch_k = 100 * (close - lo14) / (hi14 - lo14).replace(0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_kd_diff"]= stoch_k - stoch_d
df["stoch_ob"] = np.where(stoch_k > 80, 1, 0)
df["stoch_os"] = np.where(stoch_k < 20, 1, 0)
# ── Candle structure ────────────────────────────────────────────────────
body = (close - open_).abs()
candle_rng= (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_rng # body vs full range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["candle_dir"] = np.where(close > open_, 1, -1)
df["candle_dir_3"] = df["candle_dir"].rolling(3).sum() # short-term bias
# ── Volume-less momentum oscillator (Williams %R 14) ───────────────────
df["williams_r"] = -100 * (hi14 - close) / (hi14 - lo14).replace(0, np.nan)
# ── RSI x MACD composite signal ─────────────────────────────────────────
df["rsi_macd_bull"] = np.where(
(df["rsi_14"] > 50) & (macd_hist > 0), 1,
np.where((df["rsi_14"] < 50) & (macd_hist < 0), -1, 0)
)
# ── Divergence proxy: price vs RSI direction (3-bar) ────────────────────
price_dir3 = np.sign(close.diff(3))
rsi_dir3 = np.sign(df["rsi_14"].diff(3))
df["rsi_div"] = np.where(price_dir3 != rsi_dir3, 1, 0)
# ── Mean-reversion signal: distance from 50-bar mean normalised by ATR ──
sma50 = close.rolling(50).mean()
df["zscore_50"] = (close - sma50) / (close.rolling(50).std(ddof=0).replace(0, np.nan))
df["mean_rev_long"] = np.where(df["zscore_50"] < -1.5, 1, 0)
df["mean_rev_short"] = np.where(df["zscore_50"] > 1.5, 1, 0)
# ── Interaction features ─────────────────────────────────────────────────
df["rsi_bb_pct"] = df["rsi_14"] * df["bb_pct"]
df["macd_hist_rsi_mid"] = df["macd_hist"] * df["rsi_mid"]
df["stoch_rsi"] = df["stoch_k"] * df["rsi_14"] / 1e4 # normalised product
# ── Lag features (avoid lookahead) ──────────────────────────────────────
for lag in [1, 2, 4, 8]:
df[f"rsi_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
df[f"ret_lag{lag}"] = df["ret_1"].shift(lag)
# ── Hour-of-day & day-of-week cyclic encoding ───────────────────────────
if hasattr(df.index, "hour"):
hour = df.index.hour
dow = df.index.dayofweek
df["hour_sin"] = np.sin(2 * np.pi * hour / 24)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24)
df["dow_sin"] = np.sin(2 * np.pi * dow / 5)
df["dow_cos"] = np.cos(2 * np.pi * dow / 5)
# ── Final fill ───────────────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "NZD/USD RSI-MACD Gradient Boost Risk-Adjusted",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 600,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.75,
"max_features": "sqrt",
"min_samples_leaf": 20,
"min_samples_split":40,
"warm_start": False,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [21, 21],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) using a deep "
"GradientBoostingClassifier with many slow-learning trees and "
"aggressive regularisation (min_samples_leaf=20, subsample=0.75). "
"Feature set deliberately differs from prior RSI+BB+Stoch attempts "
"by adding: ATR-normalised returns, z-score mean-reversion signals, "
"RSI divergence proxy, Williams %R, candle structure ratios, cyclic "
"time encoding, and interaction/lag features to give the model richer "
"multi-timeframe context. SL=0.5%/TP=1% gives 1:2 RR aligned with "
"maximising Sharpe."
),
"notes": (
"Prior PF=1.35 / ret=+18.36% used standard RSI+MACD+BB+Stoch without "
"ATR normalisation or divergence detection. This version adds z-score "
"mean-reversion context, candle structure, and temporal encoding to "
"reduce false positives. session_filter=[21,21] is intentionally "
"narrow — set to None if you want 24h coverage. min_atr=0.0002 "
"avoids dead-market signals."
),
}
|
||||||||||
|
2.01
|
GBP/USD SMA Trend + Multi-Indicator XGBoost Classifier
Maximize risk-adjusted return on GBP/USD 15-min bars. Strategy combines required SMA (20/50/200) distance and cross features with ADX trend …
|
E
@elastic-moose-350
|
GBPUSD | 15min | 43.4%40.0% | +7.34%+7.40% | 1.731.49 | 2.20%2.20% | 7610 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:27:43
# Model : XGBoost
# Feature Eng. : SMA (20,50,200) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Required SMAs and distance metrics ──────────────────────────────────
for p in [20, 50, 200]:
sma = close.rolling(p).mean()
df[f"sma_{p}"] = sma
df[f"dm_sma_{p}"] = (close - sma) / sma
# ── SMA slope (momentum of the moving average itself) ───────────────────
for p in [20, 50]:
sma = close.rolling(p).mean()
df[f"sma_{p}_slope"] = sma.diff(5) / sma.shift(5)
# ── SMA cross signals ────────────────────────────────────────────────────
sma20 = close.rolling(20).mean()
sma50 = close.rolling(50).mean()
sma200 = close.rolling(200).mean()
df["sma20_50_cross"] = (sma20 - sma50) / sma50
df["sma50_200_cross"] = (sma50 - sma200) / sma200
df["sma20_200_cross"] = (sma20 - sma200) / sma200
# ── Price momentum over multiple horizons ────────────────────────────────
for lag in [1, 3, 6, 12, 24, 48]:
df[f"ret_{lag}"] = close.pct_change(lag)
# ── Volatility: rolling standard deviation of returns ───────────────────
ret1 = close.pct_change(1)
for w in [10, 20, 40]:
df[f"vol_{w}"] = ret1.rolling(w).std()
# ── ATR (Average True Range, normalised) ─────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
for w in [14, 28]:
atr = tr.ewm(span=w, adjust=False).mean()
df[f"natr_{w}"] = atr / close
# ── Bollinger Bands (20-period, 2σ) ──────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_up = bb_mid + 2 * bb_std
bb_lo = bb_mid - 2 * bb_std
bb_width = (bb_up - bb_lo) / bb_mid
df["bb_pct_b"] = (close - bb_lo) / (bb_up - bb_lo + 1e-12)
df["bb_width"] = bb_width
df["bb_squeeze"]= np.where(bb_width < bb_width.rolling(50).mean(), 1.0, 0.0)
# ── Keltner Channel (for squeeze confirmation) ───────────────────────────
kc_mid = close.ewm(span=20, adjust=False).mean()
kc_atr = tr.ewm(span=20, adjust=False).mean()
kc_up = kc_mid + 1.5 * kc_atr
kc_lo = kc_mid - 1.5 * kc_atr
df["kc_pct"] = (close - kc_lo) / (kc_up - kc_lo + 1e-12)
# ── RSI (Wilder) ─────────────────────────────────────────────────────────
def wilder_rsi(src, period):
delta = src.diff(1)
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_g = gain.ewm(alpha=1/period, adjust=False).mean()
avg_l = loss.ewm(alpha=1/period, adjust=False).mean()
rs = avg_g / (avg_l + 1e-12)
return 100 - 100 / (1 + rs)
rsi14 = wilder_rsi(close, 14)
rsi6 = wilder_rsi(close, 6)
rsi28 = wilder_rsi(close, 28)
df["rsi14"] = rsi14 / 100.0
df["rsi6"] = rsi6 / 100.0
df["rsi28"] = rsi28 / 100.0
df["rsi14_slope"] = rsi14.diff(3) / 100.0
# RSI divergence proxy: price new high/low but RSI doesn't confirm
price_high_12 = close.rolling(12).max()
price_low_12 = close.rolling(12).min()
rsi_high_12 = rsi14.rolling(12).max()
rsi_low_12 = rsi14.rolling(12).min()
df["rsi_bear_div"] = np.where(
(close >= price_high_12 * 0.999) & (rsi14 < rsi_high_12 * 0.97), 1.0, 0.0)
df["rsi_bull_div"] = np.where(
(close <= price_low_12 * 1.001) & (rsi14 > rsi_low_12 * 1.03), 1.0, 0.0)
# ── MACD ─────────────────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd = ema12 - ema26
signal = macd.ewm(span=9, adjust=False).mean()
hist = macd - signal
df["macd_norm"] = macd / close
df["macd_sig_norm"]= signal / close
df["macd_hist_norm"]= hist / close
df["macd_hist_slope"] = hist.diff(2) / close
# ── Stochastic Oscillator ─────────────────────────────────────────────────
for k_period in [14, 5]:
lo_k = low.rolling(k_period).min()
hi_k = high.rolling(k_period).max()
stoch_k = (close - lo_k) / (hi_k - lo_k + 1e-12) * 100
stoch_d = stoch_k.rolling(3).mean()
df[f"stoch_k_{k_period}"] = stoch_k / 100.0
df[f"stoch_d_{k_period}"] = stoch_d / 100.0
df[f"stoch_kd_{k_period}"] = (stoch_k - stoch_d) / 100.0
# ── Williams %R ───────────────────────────────────────────────────────────
hi14 = high.rolling(14).max()
lo14 = low.rolling(14).min()
df["williams_r"] = (hi14 - close) / (hi14 - lo14 + 1e-12)
# ── CCI (Commodity Channel Index) ────────────────────────────────────────
tp = (high + low + close) / 3.0
tp_sma = tp.rolling(20).mean()
tp_mad = tp.rolling(20).apply(lambda x: np.mean(np.abs(x - x.mean())), raw=True)
df["cci"] = (tp - tp_sma) / (0.015 * tp_mad + 1e-12) / 100.0
# ── Volume-like proxy: candle body and wick ratios ────────────────────────
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = (close - open_).abs() / candle_range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range
df["bull_candle"] = np.where(close > open_, 1.0, 0.0)
# ── Mean reversion signal: z-score of close vs SMA20 ────────────────────
df["zscore_20"] = (close - sma20) / (close.rolling(20).std() + 1e-12)
df["zscore_50"] = (close - sma50) / (close.rolling(50).std() + 1e-12)
# ── Trend strength: ADX proxy ─────────────────────────────────────────────
plus_dm = (high.diff(1)).clip(lower=0)
minus_dm = (-low.diff(1)).clip(lower=0)
overlap = pd.concat([plus_dm, minus_dm], axis=1).min(axis=1)
plus_dm = plus_dm - overlap
minus_dm = minus_dm - overlap
atr14 = tr.ewm(span=14, adjust=False).mean()
plus_di = 100 * plus_dm.ewm(span=14, adjust=False).mean() / (atr14 + 1e-12)
minus_di = 100 * minus_dm.ewm(span=14, adjust=False).mean() / (atr14 + 1e-12)
dx = (plus_di - minus_di).abs() / (plus_di + minus_di + 1e-12) * 100
adx = dx.ewm(span=14, adjust=False).mean()
df["adx"] = adx / 100.0
df["plus_di"] = plus_di / 100.0
df["minus_di"] = minus_di / 100.0
df["di_diff"] = (plus_di - minus_di) / 100.0
# ── Regime detection: above/below long-term SMA ──────────────────────────
df["bull_regime"] = np.where(close > sma200, 1.0, 0.0)
df["mid_regime"] = np.where(close > sma50, 1.0, 0.0)
# ── Lag features (auto-regressive) ───────────────────────────────────────
for col, lags in [("rsi14", [1, 2, 4]), ("macd_hist_norm", [1, 2]), ("bb_pct_b", [1, 2])]:
for lag in lags:
df[f"{col}_lag{lag}"] = df[col].shift(lag)
# ── Time-of-day features ─────────────────────────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0)
df["dow_sin"] = np.sin(2 * np.pi * df.index.dayofweek / 5.0)
df["dow_cos"] = np.cos(2 * np.pi * df.index.dayofweek / 5.0)
# ── Fill NaN from warm-up ────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "GBP/USD SMA Trend + Multi-Indicator XGBoost Classifier",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 600,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 5,
"gamma": 0.10,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 18],
"min_atr": 0.0003,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return on GBP/USD 15-min bars. "
"Strategy combines required SMA (20/50/200) distance and cross features "
"with ADX trend strength, RSI divergence, Bollinger squeeze, Keltner, "
"MACD histogram slope, Stochastic, CCI, Williams %R, and candle-structure "
"ratios. XGBoost with strong regularisation and subsampling prevents "
"overfitting on the relatively short 1-year window. "
"Session filter 06-18 UTC keeps execution in liquid London/NY hours; "
"0.5% SL and 1.0% TP yield 1:2 R:R; sma_50 trend filter aligns trades "
"with intermediate momentum to improve win rate and Sharpe."
),
"notes": (
"Differs from prior RSI/MACD/BB/Stoch attempts by: (1) foregrounding "
"SMA cross and distance features as primary trend signals; (2) adding "
"ADX-based regime and DI differential; (3) including RSI divergence "
"proxy flags; (4) z-score mean-reversion features; (5) candle body/wick "
"structure ratios as micro-structure proxies; (6) time-of-day cyclical "
"encoding; (7) heavier regularisation (gamma, alpha, lambda) and higher "
"min_child_weight to reduce variance on the thin dataset."
),
}
|
||||||||||
|
1.94
|
AUD/USD Stoch+BB+RSI Mean-Reversion XGBoost
Maximize risk-adjusted return (Sharpe / Calmar). XGBoost chosen for its ability to capture non-linear interactions between Stochastic, Bolli…
|
S
@still-lynx-704
|
AUDUSD | 15min | 62.5%67.7% | +10.93%+9.41% | 1.181.22 | 4.00%4.00% | 74299 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:51:31
# Model : XGBoost
# Feature Eng. : BB (20,2.0), RSI 14, Stochastic (14,3) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_std_val = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_std_val
bb_lower = bb_mid - bb_std * bb_std_val
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi"] = 100 - (100 / (1 + rs))
# ── Stochastic Oscillator (K=14, D=3) ────────────────────────────────────
stoch_k_period = 14
stoch_d_period = 3
lowest_low = low.rolling(stoch_k_period).min()
highest_high = high.rolling(stoch_k_period).max()
denom = (highest_high - lowest_low).replace(0, np.nan)
df["stoch_k"] = 100 * (close - lowest_low) / denom
df["stoch_d"] = df["stoch_k"].rolling(stoch_d_period).mean()
df["stoch_kd_diff"] = df["stoch_k"] - df["stoch_d"]
# ── Additional derived features ──────────────────────────────────────────
# RSI overbought / oversold zone flags
df["rsi_ob"] = np.where(df["rsi"] > 70, 1, 0)
df["rsi_os"] = np.where(df["rsi"] < 30, 1, 0)
df["rsi_mid"] = df["rsi"] - 50.0
# Stochastic overbought / oversold zone flags
df["stoch_ob"] = np.where(df["stoch_k"] > 80, 1, 0)
df["stoch_os"] = np.where(df["stoch_k"] < 20, 1, 0)
# BB position regime: price relative to bands
df["price_above_bb_upper"] = np.where(close > bb_upper, 1, 0)
df["price_below_bb_lower"] = np.where(close < bb_lower, 1, 0)
df["price_vs_bb_mid"] = close - bb_mid
# ATR-based volatility (14-bar)
atr_period = 14
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
df["atr14"] = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["natr14"] = df["atr14"] / close
# SMA trend context
df["sma_20"] = close.rolling(20).mean()
df["sma_50"] = close.rolling(50).mean()
df["sma_200"] = close.rolling(200).mean()
df["price_vs_sma20"] = (close - df["sma_20"]) / df["sma_20"]
df["price_vs_sma50"] = (close - df["sma_50"]) / df["sma_50"]
df["sma20_vs_sma50"] = (df["sma_20"] - df["sma_50"]) / df["sma_50"]
# Momentum: rate of change
df["roc_5"] = close.pct_change(5)
df["roc_10"] = close.pct_change(10)
df["roc_20"] = close.pct_change(20)
# MACD-style (EMA 12 - EMA 26)
ema12 = close.ewm(span=12, min_periods=12).mean()
ema26 = close.ewm(span=26, min_periods=26).mean()
macd_line = ema12 - ema26
macd_signal = macd_line.ewm(span=9, min_periods=9).mean()
df["macd"] = macd_line
df["macd_signal"] = macd_signal
df["macd_hist"] = macd_line - macd_signal
# Candle body / wick ratios
body = (close - open_).abs()
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range
df["bullish_bar"] = np.where(close > open_, 1, 0)
# Lagged RSI / Stoch features (1 and 2 bars back)
df["rsi_lag1"] = df["rsi"].shift(1)
df["rsi_lag2"] = df["rsi"].shift(2)
df["stoch_k_lag1"] = df["stoch_k"].shift(1)
df["bb_pct_lag1"] = df["bb_pct"].shift(1)
# RSI slope
df["rsi_slope"] = df["rsi"] - df["rsi"].shift(3)
# Stoch K crossing D (momentum signal)
df["stoch_cross_up"] = np.where((df["stoch_k"] > df["stoch_d"]) &
(df["stoch_k"].shift(1) <= df["stoch_d"].shift(1)), 1, 0)
df["stoch_cross_down"] = np.where((df["stoch_k"] < df["stoch_d"]) &
(df["stoch_k"].shift(1) >= df["stoch_d"].shift(1)), 1, 0)
# Volume (if present)
if "volume" in df.columns:
vol_ma = df["volume"].rolling(20).mean()
df["vol_ratio"] = df["volume"] / vol_ma.replace(0, np.nan)
else:
df["vol_ratio"] = 1.0
# ── Fill NaN from warm-up ────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD Stoch+BB+RSI Mean-Reversion XGBoost",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 5,
"gamma": 0.15,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [0, 23],
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe / Calmar). "
"XGBoost chosen for its ability to capture non-linear interactions "
"between Stochastic, Bollinger Bands, and RSI regimes. "
"Shallow trees (max_depth=4) + high regularisation (reg_lambda=1.5, gamma=0.15) "
"prevent overfitting on 15-min FX data. "
"2:1 TP:SL ratio (1.0% / 0.5%) improves expectancy per trade. "
"Reverse on opposite signal minimises flat time and captures regime flips."
),
"notes": (
"Features include BB width/pct, RSI(14) with overbought/oversold flags, "
"Stochastic K/D crossovers, MACD histogram, ATR volatility, SMA trend context, "
"candle body ratios, lagged indicators, and momentum ROC. "
"signal_threshold=0.55 balances precision vs recall on directional calls. "
"session_filter covers full 24h to capture Asia + London + NY sessions for AUD/USD."
),
}
|
||||||||||
|
1.93
|
EMA(9/21) trend
|
M
@malcolmtan
|
EMA(9/ | 50.0%— | +0.90%— | 2.15— | 0.39%0.39% | 10— |
|
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-25 02:27:36
# Model : XGBoost
# Feature Eng. : go long when EMA(9) crosses above EMA(21), exit when it crosses back below + Auto-add features: ON
# Signal / Entry : —
# Optimization : —
# Risk Mgmt : —
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
# ── Inlined strategy_utils ──
"""
strategy_utils.py — Standard utility functions for generated strategies.
Claude imports these instead of writing boilerplate from scratch.
This ensures consistent behavior across all generated strategies.
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Max backtest window per timeframe. A finer timeframe over a longer window
# blows up the results dict / parquet load / Modal train time (the 2026-05-12
# OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from
# 2 years of 1-min bars. Enforced HERE because every training path (UI / API /
# Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future
# "max plan" / dedicated-server tier can lift it.
_TF_MAX_DAYS = {
"1min": 30,
"5min": 90,
"15min": 365,
"1h": 730,
}
def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str):
"""Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint
instead of reading a local file. Used inside Modal containers / Mac worker
pool (Phase 3.4) so every train sees the same source of truth as the chart.
Returns: pd.DataFrame (parquet decoded), or raises on any failure so the
caller can fall back / surface a clear error in the job.
"""
import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os
import urllib.request as _ur, urllib.parse as _urp
base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/")
secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip()
if not base:
raise RuntimeError("QM_INTERNAL_OHLC_BASE not set")
if not secret:
raise RuntimeError("INTERNAL_WS_SECRET not set")
msg = f"{symbol}|{tf}|{start}|{end}".encode("utf-8")
sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest()
qs = _urp.urlencode({
"symbol": symbol, "tf": tf,
"start": start, "end": end, "sig": sig,
})
url = f"{base}/internal/ohlc?{qs}"
req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"})
with _ur.urlopen(req, timeout=30) as resp:
if resp.status != 200:
raise RuntimeError(f"/internal/ohlc returned {resp.status}")
payload = resp.read()
print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True)
return pd.read_parquet(_io.BytesIO(payload))
def _parse_symbol_tf_from_path(data_path: str):
"""Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet."""
import os as _os, re as _re
base = _os.path.basename(str(data_path))
m = _re.match(r"^([A-Z]{6})_(\d+min|\d+h)\.parquet$", base)
if not m:
return None, None
return m.group(1), m.group(2)
def load_ohlc(data_path, start_date="", end_date=""):
"""Load OHLC parquet, sort index, filter dates. Always returns consistent format.
The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for
more history than the cap silently starts later.
Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from
Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local
file (which on Modal is a stale Volume snapshot). The endpoint applies the
same day-cap, so the local cap-check below is a defensive no-op in that
path. Flag defaults to "0" → unchanged behavior.
Returns: (df, close, open_, high, low)
"""
import os as _os, re as _re
_use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1"
if _use_internal:
_sym, _tf = _parse_symbol_tf_from_path(data_path)
if not _sym or not _tf:
raise RuntimeError(
f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match "
f"SYMBOL_TF.parquet: {data_path}"
)
df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "")
else:
df = pd.read_parquet(data_path)
df.index = pd.to_datetime(df.index)
df = df.sort_index()
# Per-timeframe window cap (timeframe inferred from the parquet filename).
_m = _re.search(r"_(\d+min|\d+h)\.parquet$", _os.path.basename(str(data_path)))
_tf = _m.group(1) if _m else None
_max_days = _TF_MAX_DAYS.get(_tf)
if _max_days and _max_days > 0 and len(df):
_env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}")
if _env_override and _env_override.isdigit():
_max_days = int(_env_override)
try:
_eff_end = pd.Timestamp(end_date) if end_date else df.index.max()
_eff_end = min(_eff_end, df.index.max())
_floor = _eff_end - pd.Timedelta(days=_max_days)
_req_start = pd.Timestamp(start_date) if start_date else df.index.min()
if _req_start < _floor:
print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: "
f"start {_req_start.date()} -> {_floor.date()}", flush=True)
start_date = _floor
except Exception as _e:
print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True)
if start_date:
df = df[df.index >= start_date]
if end_date:
df = df[df.index <= end_date]
return df, df["close"], df["open"], df["high"], df["low"]
def make_target(close, horizon=4):
"""Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data.
Returns: target (pd.Series of -1, 0, 1)
"""
return np.sign(close.shift(-horizon) - close)
def split_data(df, target, feature_cols, train_split=0.7, validation_date=""):
"""Train/test split. Handles both ratio and date-based splits.
Drops NaN from target before splitting. Encodes labels to [0,1,2].
Returns: dict with keys:
X_train, X_test, y_train, y_test,
y_train_enc, y_test_enc, enc,
close_train, close_test,
split_idx, split_dt, n_train, n_test
"""
# Drop NaN from target
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
# Build feature matrix
X = df[feature_cols].copy()
X = X.bfill().ffill()
X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
# Split
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
split_dt = str(df.index[split_idx])
# Label encoding — always fit on [-1, 0, 1]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
return {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
def compute_overlays(close, df_index):
"""Compute BB and MA overlays on full dataset. Always consistent.
Returns: (bb_dict, ma_dict)
"""
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
ma50 = close.rolling(50).mean()
ma100 = close.rolling(100).mean()
ma200 = close.rolling(200).mean()
def _safe(s):
s = s.reindex(df_index).bfill().ffill()
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in s.values]
bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)}
ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)}
return bb, ma
def run_backtest(signal, close, capital=10000, cost=2e-5):
"""Run backtest with transaction costs.
Uses price-based trade returns (same as webapp _compute_trades).
Signal 0 = hold (keep current position), not close.
Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns
"""
sig_arr = signal.values
price_arr = close.values
idx = signal.index
n = len(price_arr)
# Trade returns — price-based (matches webapp _compute_trades exactly)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
last_dir = None
entry_price = None
entry_bar = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != last_dir:
# Direction change — close previous trade, open new
if last_dir is not None and entry_price is not None and entry_price != 0:
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[i]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "signal",
})
entry_price = c
entry_bar = i
last_dir = s
# Close last open trade
if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0:
c = price_arr[-1]
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[-1]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "end",
})
# Equity curve from trade returns
cumret = 1.0
equity_vals = np.full(n, float(capital))
trade_idx = 0
in_trade = False
t_entry_price = None
t_dir = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != t_dir:
if t_dir is not None and t_entry_price is not None and t_entry_price != 0:
t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost
cumret *= (1 + t_ret)
t_entry_price = c
t_dir = s
equity_vals[i] = capital * cumret
# Bar returns for Sharpe
bar_returns = np.zeros(n)
for i in range(1, n):
if price_arr[i - 1] != 0 and last_dir is not None:
bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
def compute_trade_stats(trades, capital=10000):
"""Single source of truth for trade statistics.
Every display path reads from this — no recomputation anywhere.
All values are rounded and JSON-safe (no inf/nan).
"""
if not trades:
return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0,
"best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0,
"pf": 0, "rr": 0, "expect": 0}
w = [r for r in trades if r > 0]
l = [r for r in trades if r < 0]
cumret = 1.0
for r in trades:
cumret *= (1 + r)
net_p = capital * (cumret - 1)
# Max drawdown
eq = np.cumprod([1.0] + [1 + r for r in trades])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
# Profit Factor
gross_w = sum(w) if w else 0
gross_l = abs(sum(l)) if l else 0
pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0)
# Risk:Reward
avg_w = float(np.mean(w)) if w else 0
avg_l = abs(float(np.mean(l))) if l else 0
rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0)
# Expectancy
expect = net_p / len(trades)
return {
"n": len(trades), "wins": len(w), "losses": len(l),
"wr": round(len(w) / len(trades), 4),
"avg": round(float(np.mean(trades)), 6),
"best": round(max(w), 6) if w else 0,
"worst": round(min(l), 6) if l else 0,
"ret": round(cumret - 1, 6),
"np": round(net_p, 2),
"mdd": round(mdd, 6),
"pf": round(pf, 2),
"rr": round(rr, 2),
"expect": round(expect, 2),
}
def compute_metrics(bt_result, close_test, capital=10000):
"""Compute all standard metrics from backtest result.
Uses trade-level compounding (same as webapp _trade_stats) for accuracy.
Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades
"""
equity = bt_result["equity"]
trade_returns = bt_result["trade_returns"]
# Total return — trade-level compounding (matches webapp)
if trade_returns:
cumret = 1.0
for r in trade_returns:
cumret *= (1 + r)
total_ret = cumret - 1
else:
total_ret = 0.0
# Buy and hold
bh_equity = capital * (close_test / close_test.iloc[0])
bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0
# Sharpe ratio — trade-level (matches webapp: sqrt(252*26) annualization)
if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0:
sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) * np.sqrt(252 * 26))
else:
sharpe_strat = 0.0
bh_rets = bh_equity.pct_change().dropna()
if len(bh_rets) > 1 and bh_rets.std() != 0:
sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4))
else:
sharpe_bh = 0.0
# Max drawdown — trade-level (matches webapp)
if trade_returns:
eq = np.cumprod([1.0] + [1 + r for r in trade_returns])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
else:
mdd = 0.0
return {
"total_ret": float(total_ret),
"bh_ret": float(bh_ret),
"sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0,
"sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0,
"mdd": float(mdd),
"n_trades": len(trade_returns),
}
# Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist)
# only feed the small Diagnostics charts — they're never used by the price chart
# or scroll-back. On a 1-min model trained over the (2.2-capped) window these are
# still ~30k points each; downsample to a visually-identical resolution before the
# dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres.
_RESULTS_SERIES_MAX = 5000
def _downsample_idx(n, cap=_RESULTS_SERIES_MAX):
"""Evenly-spaced index list spanning [0, n-1] (first+last always kept), or
None when no downsampling is needed (n <= cap)."""
if n <= cap:
return None
return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist()
def _take(arr, idx):
"""Subset a list by an index list (idx may be None → return arr unchanged)."""
if idx is None or not isinstance(arr, list):
return arr
return [arr[i] for i in idx]
# trade_log / train_trade_log are lists of per-trade dicts (display-only — the
# Trade Log tab). They scale with TRADE count, not bar count, so the bar-window
# cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put
# 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model
# keeps every trade) to the most-recent N, recording `*_total` + `*_truncated`
# so the true count is still reported. Real strategies have far fewer than
# _TRADE_LOG_MAX trades, so this only ever bites pathological models.
_TRADE_LOG_MAX = 5000
def _cap_trade_log(tl):
"""Return (capped_list, original_len, was_truncated)."""
if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX:
return tl, (len(tl) if isinstance(tl, list) else 0), False
return tl[-_TRADE_LOG_MAX:], len(tl), True
def build_return_dict(split_result, bt_result, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=None,
bt_train_result=None, pre_stats=None):
"""Assemble the complete return dict. Handles ALL serialization.
Never returns Timestamps, numpy arrays, or non-JSON types.
Returns: JSON-safe dict with all required keys
"""
df = split_result["df"]
close = split_result["close"]
close_test = split_result["close_test"]
X_test = split_result["X_test"]
y_test = split_result["y_test"]
equity = bt_result["equity"]
bar_returns = bt_result["bar_returns"]
# OHLC
ohlc_dates = [str(x) for x in df.index.tolist()]
def _safe_list(arr):
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in arr]
# Overlays
bb, ma = compute_overlays(close, df.index)
# Buy and hold equity
capital = equity.iloc[0] if len(equity) > 0 else 10000
bh_equity = capital * (close_test / close_test.iloc[0])
# Confusion matrix
from sklearn.metrics import confusion_matrix
pred_test = model.predict(X_test)
y_test_arr = np.asarray(y_test)
cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1])
# Rolling accuracy
sig_arr = signal_full.reindex(close_test.index).values
correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index)
active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index)
correct_active = correct.where(active_test, other=np.nan)
rolling_acc = correct_active.rolling(30, min_periods=1).mean()
# Feature importance
importances = model.feature_importances_
fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:]
# Drawdown
rolling_max = equity.cummax()
drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan)
drawdown = drawdown.fillna(0.0)
# ── Downsample the Diagnostics-only series (see _downsample_idx) ──────────
_eq_dates = [str(x) for x in close_test.index.tolist()]
_eq_strat = _safe_list(equity.values)
_eq_bh = _safe_list(bh_equity.values)
_eq_idx = _downsample_idx(len(_eq_dates))
_eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx)
_ra_dates = [str(x) for x in rolling_acc.index.tolist()]
_ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values]
_ra_idx = _downsample_idx(len(_ra_dates))
_ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx)
_dd_dates = [str(x) for x in drawdown.index.tolist()]
_dd_vals = _safe_list(drawdown.values)
_dd_idx = _downsample_idx(len(_dd_dates))
_dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx)
_cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))]
_cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))]
_cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos)))
_cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg)))
# ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a
# `_total` field so the true count is still reported (see _cap_trade_log).
# NB: ret_dist arrays are left FULL — a downstream path in callbacks.py
# recomputes n_trades/win-rate from len(ret_dist), so a sample would skew
# the displayed counts; they're small anyway and gzip handles them.
_tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", []))
_tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else [])
return {
"ohlc": {
"dates": ohlc_dates,
"open": _safe_list(df["open"].values),
"high": _safe_list(df["high"].values),
"low": _safe_list(df["low"].values),
"close": _safe_list(df["close"].values),
},
"signals": {
"dates": [str(x) for x in signal_full.index.tolist()],
"values": [float(x) for x in signal_full.values],
},
"bb": bb,
"ma": ma,
"equity": {
"dates": _eq_dates,
"strategy": _eq_strat,
"bh": _eq_bh,
},
"feature_importance": {
"names": [p[0] for p in fi_pairs],
"values": [float(p[1]) for p in fi_pairs],
},
"conf_matrix": cm.tolist(),
"conf_hist": {
"p_pos": _cp_pos,
"p_neg": _cp_neg,
},
"rolling_acc": {
"dates": _ra_dates,
"values": _ra_vals,
},
"drawdown": {
"dates": _dd_dates,
"values": _dd_vals,
},
"ret_dist": [float(x) for x in bt_result["trade_returns"]],
"ret_dist_long": [float(x) for x in bt_result["long_returns"]],
"ret_dist_short": [float(x) for x in bt_result["short_returns"]],
"train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [],
"train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [],
"train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [],
"trade_log": _tl_test,
"train_trade_log": _tl_tr,
"trade_log_total": _tl_test_n,
"train_trade_log_total": _tl_tr_n,
"trade_log_truncated": _tl_test_tr,
"train_trade_log_truncated": _tl_tr_tr,
**(pre_stats or {}),
"metrics": metrics,
"split_dt": split_result["split_dt"],
"split_idx": int(split_result["split_idx"]),
"n_train": int(split_result["n_train"]),
"n_test": int(split_result["n_test"]),
"feature_cols": list(feature_cols),
"custom_figs": custom_figs or [],
}
# ════════════════════════════════════════════════════════════════════════════
# STRATEGY FRAMEWORK v2 — Config-driven architecture
# Claude writes feature_engineering() + strategy_config(). Framework does rest.
# ════════════════════════════════════════════════════════════════════════════
import importlib
_MODEL_REGISTRY = {
"XGBClassifier": ("xgboost", "XGBClassifier"),
"RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"),
"GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"),
"LogisticRegression": ("sklearn.linear_model", "LogisticRegression"),
"ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"),
"AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"),
}
def _build_model_from_config(config, X_train, y_train_enc):
"""Build, fit, and wrap a model from strategy_config dict."""
model_type = config.get("model_type", "RandomForestClassifier")
model_params = dict(config.get("model_params", {}))
if model_type not in _MODEL_REGISTRY:
raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}")
module_path, class_name = _MODEL_REGISTRY[model_type]
mod = importlib.import_module(module_path)
cls = getattr(mod, class_name)
# XGBoost defaults
if class_name == "XGBClassifier":
model_params.setdefault("use_label_encoder", False)
model_params.setdefault("eval_metric", "mlogloss")
model_params.setdefault("tree_method", "hist")
# Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is
# NON-reproducible even with random_state set — the parallel histogram
# gradient-sum order varies across threads, so the SAME code + data
# gives a slightly different model (and backtest) every run. Forcing
# single-thread makes training bit-reproducible so: (a) a user who
# copies a strategy and reruns it gets identical numbers, (b) the
# community "Live" score matches a redeploy, (c) "same code, different
# result" support reports go away. Cost: single-threaded XGB (a few
# seconds slower on large windows; hist is fast so it's minor). FORCED
# (not setdefault) so the guarantee can't be silently broken by a
# strategy passing n_jobs. Exact reproducibility holds within the
# platform (pinned versions / same Modal image); a user's own machine
# with different xgboost/numpy/CPU can still differ in low-order bits.
model_params["n_jobs"] = 1
# Common defaults
model_params.setdefault("random_state", 42)
from model_wrapper import ModelWrapper
clf = cls(**model_params)
clf.fit(X_train, y_train_enc)
enc = LabelEncoder()
enc.fit([-1, 0, 1])
return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1])
def _generate_signals(model, X, threshold):
"""Framework-owned signal generation. Deterministic threshold logic."""
proba = model.predict_proba(X)
classes = list(model.classes_)
idx_pos = classes.index(1) if 1 in classes else None
idx_neg = classes.index(-1) if -1 in classes else None
p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X))
p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X))
signal_vals = np.zeros(len(X))
signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals)
signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals)
# Both exceed: pick stronger
both = (p_pos >= threshold) & (p_neg >= threshold)
signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0)
return pd.Series(signal_vals, index=X.index), p_pos, p_neg
# ── Filter functions (all no-ops when config value is None) ──────────────
def _apply_direction_filter(signal, direction):
"""Zero out signals that don't match allowed direction."""
if direction is None or direction == "both":
return signal
s = signal.copy()
if direction == "long":
s[s < 0] = 0.0
elif direction == "short":
s[s > 0] = 0.0
return s
def _apply_session_filter(signal, index, session_hours):
"""Zero out signals outside session hours [start, end] UTC."""
if session_hours is None:
return signal
s = signal.copy()
start_h, end_h = session_hours[0], session_hours[1]
hours = index.hour
if start_h <= end_h:
mask = (hours >= start_h) & (hours < end_h)
else: # wrap around midnight, e.g. [22, 6]
mask = (hours >= start_h) | (hours < end_h)
s[~mask] = 0.0
return s
def _apply_atr_filter(signal, close, high, low, min_atr):
"""Zero out signals when NATR(14) is below threshold."""
if min_atr is None:
return signal
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
natr = atr14 / close.replace(0, np.nan)
s = signal.copy()
s[natr < min_atr] = 0.0
return s
def _apply_trend_filter(signal, close, trend_filter):
"""Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below."""
if trend_filter is None:
return signal
# Parse: "sma_50" → SMA with period 50
parts = trend_filter.lower().replace("-", "_").split("_")
if len(parts) >= 2 and parts[0] in ("sma", "ema"):
period = int(parts[1])
else:
return signal # unknown filter, skip
if parts[0] == "sma":
trend_line = close.rolling(period).mean()
else:
trend_line = close.ewm(span=period, adjust=False).mean()
s = signal.copy()
# Longs only above trend, shorts only below
s[(s > 0) & (close < trend_line)] = 0.0
s[(s < 0) & (close > trend_line)] = 0.0
return s
# ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ──
def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5):
"""Backtest with SL/TP/cooldown/direction handling built into the engine.
Unlike run_backtest (v1), this function handles position exits internally.
Returns: same dict shape as run_backtest()
"""
stop_loss = config.get("stop_loss")
take_profit = config.get("take_profit")
cooldown = config.get("cooldown", 0)
on_opposite = config.get("on_opposite", "reverse")
sig_arr = signal.values
close_arr = close.values
high_arr = high.values
low_arr = low.values
idx = signal.index
n = len(close_arr)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
equity_vals = np.full(n, float(capital))
cumret = 1.0
position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat)
entry_price = None
entry_bar = None # index into arrays for entry time
cooldown_remaining = 0
def _log_trade(exit_bar, exit_px, ret, reason):
trade_log.append({
"type": "Buy" if position == 1.0 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[exit_bar]),
"entry_price": round(entry_price, 5),
"exit_price": round(exit_px, 5),
"pnl": round(position * (exit_px - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": reason,
})
for i in range(n):
c = close_arr[i]
h = high_arr[i]
lo = low_arr[i]
s = sig_arr[i]
# 1. Check SL/TP if in trade
if position != 0.0 and entry_price is not None:
hit_sl = False
hit_tp = False
exit_price = None
if position == 1.0: # long
if stop_loss is not None and lo <= entry_price * (1 - stop_loss):
hit_sl = True
exit_price = entry_price * (1 - stop_loss)
elif take_profit is not None and h >= entry_price * (1 + take_profit):
hit_tp = True
exit_price = entry_price * (1 + take_profit)
else: # short
if stop_loss is not None and h >= entry_price * (1 + stop_loss):
hit_sl = True
exit_price = entry_price * (1 + stop_loss)
elif take_profit is not None and lo <= entry_price * (1 - take_profit):
hit_tp = True
exit_price = entry_price * (1 - take_profit)
if hit_sl or hit_tp:
ret = float(position * (exit_price - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, exit_price, ret, "SL" if hit_sl else "TP")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
continue
# 2. Cooldown
if cooldown_remaining > 0:
cooldown_remaining -= 1
equity_vals[i] = capital * cumret
continue
# 3. Signal processing
if s != 0.0:
if position == 0.0:
# Open new trade
position = s
entry_price = c
entry_bar = i
elif s != position:
# Opposite signal
if on_opposite == "reverse":
# Close current + open opposite
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "signal")
cumret *= (1 + ret)
position = s
entry_price = c
entry_bar = i
else: # close_only
# Close current, go flat
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "close_only")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
# Close last open trade at final close
if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0:
c = close_arr[-1]
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(n - 1, c, ret, "end")
cumret *= (1 + ret)
equity_vals[-1] = capital * cumret
# Bar returns for Sharpe (approximate)
bar_returns = np.zeros(n)
for i in range(1, n):
if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0:
bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1]
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
# ── run_strategy: the v2 orchestrator ────────────────────────────────────
def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="",
validation_date="", train_split=0.7, register_model_fn=None):
"""Config-driven strategy execution. Claude writes feature_fn + config_fn,
framework does everything else.
Returns: results dict (same format as webapp expects)
"""
config = config_fn()
# Auto-correct SL/TP if Claude passed percentage instead of decimal
for _key in ("stop_loss", "take_profit"):
_val = config.get(_key)
if _val is not None and _val > 0.1: # >10% is almost certainly a percentage
config[_key] = _val / 100.0
print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)")
# 1. Load data
df, close, open_, high, low = load_ohlc(data_path, start_date, end_date)
# 2. Feature engineering (Claude's function)
df = feature_fn(df, close, open_, high, low)
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 3. Warm-up detection: drop rows where features have NaN BEFORE any fill
feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")]
raw_nans = df[feature_cols].isna().any(axis=1)
valid_rows = ~raw_nans
if valid_rows.any():
first_valid = valid_rows.idxmax()
if raw_nans.loc[:first_valid].any():
df = df.loc[first_valid:].copy()
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 4. Target
horizon = config.get("target_horizon", 4)
target = make_target(close, horizon=horizon)
# 5. Split (ffill only within each partition — no bfill leak)
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
high = df["high"]
low = df["low"]
X = df[feature_cols].copy()
X = X.replace([np.inf, -np.inf], np.nan)
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
# ffill within train and test separately (no leak)
X_train = X.iloc[:split_idx].ffill().fillna(0.0)
X_test = X.iloc[split_idx:].ffill().fillna(0.0)
X = pd.concat([X_train, X_test])
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
high_test = high.iloc[split_idx:]
low_test = low.iloc[split_idx:]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
split_dt = str(df.index[split_idx])
sp = {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
# 6. Build model from config
model = _build_model_from_config(config, X_train, y_train_enc)
# 7. Generate signals
threshold = config.get("signal_threshold", 0.55)
signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold)
signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold)
# 8. Apply filters (order: direction → session → ATR → trend)
direction = config.get("direction", "both")
signal_test = _apply_direction_filter(signal_test, direction)
signal_train = _apply_direction_filter(signal_train, direction)
session_filter = config.get("session_filter")
signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter)
signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter)
min_atr = config.get("min_atr")
if min_atr is not None:
signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr)
trend_filter = config.get("trend_filter")
if trend_filter is not None:
signal_test = _apply_trend_filter(signal_test, close_test, trend_filter)
signal_full = pd.concat([signal_train, signal_test])
# 9. Backtest with SL/TP/cooldown (test + train)
high_train = high.iloc[:split_idx]
low_train = low.iloc[:split_idx]
has_risk = (config.get("stop_loss") is not None or
config.get("take_profit") is not None or
config.get("cooldown", 0) > 0 or
config.get("on_opposite", "reverse") != "reverse")
if has_risk:
bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000)
bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000)
else:
bt = run_backtest(signal_test, close_test, capital=10000)
bt_train = run_backtest(signal_train, close_train, capital=10000)
# 10. Metrics
metrics = compute_metrics(bt, close_test, capital=10000)
# 11. Pre-compute all trade stats (single source of truth)
pre_stats = {
"train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000),
"test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000),
"long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000),
"short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000),
}
# 12. Register model
if register_model_fn is not None:
register_model_fn(model)
# 13. Build return dict
return build_return_dict(sp, bt, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=[],
bt_train_result=bt_train, pre_stats=pre_stats)
# ── End strategy_utils ──
DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/GBPUSD_15min.parquet'
START_DATE = '2026-04-15'
END_DATE = '2026-05-25'
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── EMA crossover core signals ──────────────────────────────────────────
ema9 = close.ewm(span=9, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
ema50 = close.ewm(span=50, adjust=False).mean()
ema200 = close.ewm(span=200, adjust=False).mean()
df["ema9"] = ema9
df["ema21"] = ema21
df["ema50"] = ema50
df["ema200"] = ema200
# Raw spread and normalised spread
df["ema_diff"] = ema9 - ema21
df["ema_diff_norm"] = (ema9 - ema21) / close
# Cross signal: +1 when ema9 > ema21, -1 otherwise
df["ema_cross_sign"] = np.where(ema9 > ema21, 1.0, -1.0)
# Momentum of the spread (rate of change of spread)
df["ema_diff_roc1"] = df["ema_diff"].diff(1)
df["ema_diff_roc3"] = df["ema_diff"].diff(3)
# Distance of price from ema50 and ema200 (normalised)
df["dist_ema50"] = (close - ema50) / close
df["dist_ema200"] = (close - ema200) / close
# ── RSI (14) ────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0.0)
loss = (-delta).clip(lower=0.0)
avg_g = gain.ewm(com=13, adjust=False).mean()
avg_l = loss.ewm(com=13, adjust=False).mean()
rs = avg_g / avg_l.replace(0.0, np.nan)
rsi14 = 100.0 - 100.0 / (1.0 + rs)
df["rsi14"] = rsi14
# RSI normalised and centred
df["rsi14_norm"] = (rsi14 - 50.0) / 50.0
# ── MACD ────────────────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_ln = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - signal_ln
df["macd_line"] = macd_line / close
df["macd_signal"] = signal_ln / close
df["macd_hist"] = macd_hist / close
df["macd_cross"] = np.where(macd_line > signal_ln, 1.0, -1.0)
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
bb_width = (bb_upper - bb_lower) / bb_mid.replace(0.0, np.nan)
bb_pct = (close - bb_lower) / (bb_upper - bb_lower).replace(0.0, np.nan)
df["bb_width"] = bb_width
df["bb_pct"] = bb_pct
# ── ATR (14) ─────────────────────────────────────────────────────────────
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
df["atr14"] = atr14
df["natr14"] = atr14 / close # normalised ATR (volatility proxy)
# ── Stochastic %K / %D (14, 3) ──────────────────────────────────────────
low14 = low.rolling(14).min()
high14 = high.rolling(14).max()
stoch_k = 100.0 * (close - low14) / (high14 - low14).replace(0.0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k / 100.0
df["stoch_d"] = stoch_d / 100.0
df["stoch_diff"] = (stoch_k - stoch_d) / 100.0
# ── Rate of Change ───────────────────────────────────────────────────────
df["roc1"] = close.pct_change(1)
df["roc4"] = close.pct_change(4)
df["roc8"] = close.pct_change(8)
df["roc16"] = close.pct_change(16)
# ── Candle features ──────────────────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0.0, np.nan)
df["body_ratio"] = body / candle_rng
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["candle_dir"] = np.where(close >= open_, 1.0, -1.0)
# ── Volume-like proxy: range relative to rolling average ────────────────
df["range_ratio"] = candle_rng / candle_rng.rolling(20).mean()
# ── Lagged EMA diff features ─────────────────────────────────────────────
for lag in [1, 2, 3, 4]:
df[f"ema_diff_lag{lag}"] = df["ema_diff_norm"].shift(lag)
# ── Lagged RSI ───────────────────────────────────────────────────────────
for lag in [1, 2, 4]:
df[f"rsi14_lag{lag}"] = df["rsi14_norm"].shift(lag)
# ── Rolling volatility (std of returns) ──────────────────────────────────
ret = close.pct_change()
df["vol_8"] = ret.rolling(8).std()
df["vol_16"] = ret.rolling(16).std()
df["vol_32"] = ret.rolling(32).std()
# ── Trend strength: ADX-like (simplified) ────────────────────────────────
plus_dm = (high.diff()).clip(lower=0.0)
minus_dm = (-low.diff()).clip(lower=0.0)
overlap = pd.concat([plus_dm, minus_dm], axis=1).min(axis=1)
plus_dm = plus_dm - overlap
minus_dm = minus_dm - overlap
smooth_tr = tr.ewm(com=13, adjust=False).mean()
plus_di = 100.0 * plus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan)
minus_di = 100.0 * minus_dm.ewm(com=13, adjust=False).mean() / smooth_tr.replace(0.0, np.nan)
di_sum = (plus_di + minus_di).replace(0.0, np.nan)
adx = ((plus_di - minus_di).abs() / di_sum * 100.0).ewm(com=13, adjust=False).mean()
df["adx"] = adx / 100.0
df["plus_di"] = plus_di / 100.0
df["minus_di"] = minus_di / 100.0
# ── Session hour (UTC) ───────────────────────────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2.0 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2.0 * np.pi * df.index.hour / 24.0)
else:
df["hour_sin"] = 0.0
df["hour_cos"] = 1.0
# ── Fill any NaN from warm-up periods ────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EMA 9/21 Crossover + MACD Momentum (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.80,
"colsample_bytree": 0.75,
"min_child_weight": 3,
"gamma": 0.10,
"reg_alpha": 0.05,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.0030,
"take_profit": 0.0060,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 20],
"min_atr": None,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximize Sharpe ratio on EUR/USD 15-min data. "
"Core signal: EMA(9) vs EMA(21) crossover enriched with MACD, RSI, "
"Bollinger %B, Stochastic, ATR, ADX, candle structure and rolling "
"volatility. XGBoost with moderate depth (4) and strong regularisation "
"(gamma, alpha, lambda) prevents overfitting on ~6 weeks of intraday data. "
"A 0.55 probability threshold filters low-confidence signals. "
"A 2:1 TP:SL ratio (30 bp SL / 60 bp TP) improves the reward-risk "
"balance. Session filter [6,20] UTC keeps the model away from the thin "
"Asian pre-open. trend_filter sma_50 aligns entries with the prevailing "
"short-term trend to reduce chop. Cooldown=0 and reverse-on-opposite "
"allow continuous participation in trending EMA crossover moves."
),
"notes": (
"round-trip cost 2e-5 is accounted for by the framework. "
"target_horizon=4 bars (1 hour ahead) suits EMA crossover which "
"generates medium-frequency signals rather than tick-level scalps. "
"All features are normalised or expressed as ratios to minimise "
"scale sensitivity for the logistic-objective XGBoost."
),
}
# ── Framework v2: auto-generated wrapper ──
def train_and_backtest():
_vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else ''
_ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7
return run_strategy(
feature_engineering, strategy_config,
DATA_PATH, START_DATE, END_DATE,
_vd, _ts,
register_model_fn=register_model
)
|
||||||||||
|
1.86
|
AUD/USD XGBoost SMA+RSI+MACD+BB Momentum
Maximize risk-adjusted return (Sharpe/Calmar) on AUD/USD 15-min. XGBoost with depth-4 trees and conservative regularization (reg_lambda=1.5,…
|
D
@delta-atlas-858
|
AUDUSD | 15min | 62.9%63.2% | +10.32%+9.48% | 1.171.23 | 3.96%3.96% | 745106 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:32:18
# Model : XGBoost
# Feature Eng. : SMA (20,50,200), BB (20,2.0), RSI 14, MACD (12,26,9), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# AUDUSD 15-min XGBoost Strategy
# SMA + RSI + MACD + Bollinger Bands + ATR Feature Set
# Optimized for Risk-Adjusted Return
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── SMA 20, 50, 200 + distance from close ──────────────────────────────
for period in [20, 50, 200]:
sma = close.rolling(period).mean()
df[f"sma_{period}"] = sma
df[f"dm_sma_{period}"] = (close - sma) / sma
# ── Bollinger Bands (20, 2.0) ───────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std(ddof=0)
bb_upper = bb_mid + 2.0 * bb_std
bb_lower = bb_mid - 2.0 * bb_std
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
bb_range = bb_upper - bb_lower
df["bb_pct"] = (close - bb_lower) / bb_range
# ── RSI 14 ──────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(com=13, min_periods=14).mean()
avg_loss = loss.ewm(com=13, min_periods=14).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
df["rsi_14"] = 100.0 - (100.0 / (1.0 + rs))
# ── MACD (12, 26, 9) ────────────────────────────────────────────────────
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd_line"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
# ── ATR 14 + Normalised ATR ─────────────────────────────────────────────
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(com=13, min_periods=14).mean()
df["atr_14"] = atr
df["natr"] = atr / close
# ── Price momentum / rate-of-change ────────────────────────────────────
for n in [1, 4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# ── Candle body & wick features ─────────────────────────────────────────
body = (close - open_).abs()
candle_range = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_range
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_range
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_range
df["candle_dir"] = np.where(close >= open_, 1.0, -1.0)
# ── Volume (if present) ─────────────────────────────────────────────────
if "volume" in df.columns:
vol_ma = df["volume"].rolling(20).mean()
df["vol_ratio"] = df["volume"] / vol_ma.replace(0, np.nan)
# ── Lagged RSI & MACD histogram ─────────────────────────────────────────
for lag in [1, 2, 3]:
df[f"rsi_14_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── RSI overbought / oversold zones ─────────────────────────────────────
df["rsi_ob"] = np.where(df["rsi_14"] > 70, 1.0, 0.0)
df["rsi_os"] = np.where(df["rsi_14"] < 30, 1.0, 0.0)
df["rsi_mid_up"] = np.where((df["rsi_14"] > 50) & (df["rsi_14"] <= 70), 1.0, 0.0)
df["rsi_mid_dn"] = np.where((df["rsi_14"] >= 30) & (df["rsi_14"] < 50), 1.0, 0.0)
# ── MACD cross signals ───────────────────────────────────────────────────
df["macd_cross_up"] = np.where(
(df["macd_line"] > df["macd_signal"]) &
(df["macd_line"].shift(1) <= df["macd_signal"].shift(1)),
1.0, 0.0
)
df["macd_cross_dn"] = np.where(
(df["macd_line"] < df["macd_signal"]) &
(df["macd_line"].shift(1) >= df["macd_signal"].shift(1)),
1.0, 0.0
)
# ── Price position relative to SMA alignment ────────────────────────────
df["trend_aligned_bull"] = np.where(
(close > df["sma_20"]) & (df["sma_20"] > df["sma_50"]) & (df["sma_50"] > df["sma_200"]),
1.0, 0.0
)
df["trend_aligned_bear"] = np.where(
(close < df["sma_20"]) & (df["sma_20"] < df["sma_50"]) & (df["sma_50"] < df["sma_200"]),
1.0, 0.0
)
# ── Bollinger Band squeeze (low volatility) ──────────────────────────────
bb_width_ma = df["bb_width"].rolling(20).mean()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_ma, 1.0, 0.0)
# ── Rolling close statistics ─────────────────────────────────────────────
df["close_zscore_20"] = (close - close.rolling(20).mean()) / close.rolling(20).std(ddof=0)
df["close_zscore_50"] = (close - close.rolling(50).mean()) / close.rolling(50).std(ddof=0)
# ── Volatility regime ────────────────────────────────────────────────────
natr_ma = df["natr"].rolling(20).mean()
df["vol_regime_high"] = np.where(df["natr"] > natr_ma, 1.0, 0.0)
# ── Fill NaN from indicator warm-up ─────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD XGBoost SMA+RSI+MACD+BB Momentum",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 500,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 5,
"gamma": 0.1,
"reg_alpha": 0.1,
"reg_lambda": 1.5,
"scale_pos_weight": 1.0,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [0, 23],
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe/Calmar) on AUD/USD 15-min. "
"XGBoost with depth-4 trees and conservative regularization (reg_lambda=1.5, "
"min_child_weight=5) to reduce overfitting on FX data. "
"2:1 RR (SL=0.5%, TP=1.0%) ensures positive expectancy with ~40%+ win rate. "
"Subsample + colsample add stochastic diversity. 500 estimators with lr=0.04 "
"balances bias-variance. Threshold 0.55 filters marginal signals."
),
"notes": (
"Features: SMA(20/50/200) with distances, Bollinger Bands width+pct, RSI-14 "
"with zone flags, MACD histogram + crosses, ATR-14 + NATR, momentum ROC(1/4/8/16), "
"candle body/wick ratios, trend alignment flags, BB squeeze, z-scores, vol regime. "
"Reverse on opposite signal to capture trend reversals. Session filter disabled "
"to capture AUD/USD Asian + London + NY sessions. Target horizon = 4 bars (1 hour)."
),
}
|
||||||||||
|
1.68
|
AUD/USD EMA Cross RSI Gradient Boost Scalper
Maximize risk-adjusted return (Sharpe) on AUD/USD 15-min bars. GradientBoostingClassifier with shrinkage (lr=0.04), moderate depth (4), subs…
|
R
@rapid-shark-854
|
AUDUSD | 15min | 59.8%63.1% | +1.99%+12.21% | 1.051.31 | 6.00%6.00% | 336111 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:21:53
# Model : Gradient Boosting
# Feature Eng. : EMA (9,21), RSI 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# --- EMA 9 and EMA 21 (required) ---
ema_9 = close.ewm(span=9, adjust=False).mean()
ema_21 = close.ewm(span=21, adjust=False).mean()
df["ema_9"] = ema_9
df["ema_21"] = ema_21
df["dm_ema_9"] = (close - ema_9) / ema_9
df["dm_ema_21"] = (close - ema_21) / ema_21
# EMA crossover signal and spread
df["ema_cross"] = ema_9 - ema_21
df["ema_cross_prev"] = df["ema_cross"].shift(1)
df["ema_cross_sign"] = np.sign(df["ema_cross"])
df["ema_cross_change"] = df["ema_cross_sign"] - np.sign(df["ema_cross_prev"])
# --- RSI 14 (required) ---
delta = close.diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.ewm(com=13, adjust=False).mean()
avg_loss = loss.ewm(com=13, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
rsi_14 = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi_14
# RSI derived features
df["rsi_14_norm"] = (rsi_14 - 50) / 50
df["rsi_overbought"] = np.where(rsi_14 > 70, 1, 0)
df["rsi_oversold"] = np.where(rsi_14 < 30, 1, 0)
df["rsi_mid_cross"] = np.where(rsi_14 > 50, 1, -1)
# --- Additional EMAs for context ---
ema_50 = close.ewm(span=50, adjust=False).mean()
ema_200 = close.ewm(span=200, adjust=False).mean()
df["ema_50"] = ema_50
df["ema_200"] = ema_200
df["dm_ema_50"] = (close - ema_50) / ema_50
df["dm_ema_200"] = (close - ema_200) / ema_200
df["ema_50_200_spread"] = (ema_50 - ema_200) / ema_200
# --- ATR (14 periods) ---
tr1 = high - low
tr2 = (high - close.shift(1)).abs()
tr3 = (low - close.shift(1)).abs()
true_range = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
atr_14 = true_range.ewm(com=13, adjust=False).mean()
df["atr_14"] = atr_14
df["natr_14"] = atr_14 / close
# --- Bollinger Bands (20, 2) ---
sma_20 = close.rolling(20).mean()
std_20 = close.rolling(20).std()
bb_upper = sma_20 + 2 * std_20
bb_lower = sma_20 - 2 * std_20
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_pct_b"] = (close - bb_lower) / (bb_upper - bb_lower).replace(0, np.nan)
df["bb_width"] = (bb_upper - bb_lower) / sma_20
# --- MACD (12, 26, 9) ---
ema_12 = close.ewm(span=12, adjust=False).mean()
ema_26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema_12 - ema_26
macd_signal = macd_line.ewm(span=9, adjust=False).mean()
macd_hist = macd_line - macd_signal
df["macd_line"] = macd_line
df["macd_signal_line"] = macd_signal
df["macd_hist"] = macd_hist
df["macd_hist_sign"] = np.sign(macd_hist)
df["macd_hist_change"] = np.sign(macd_hist) - np.sign(macd_hist.shift(1))
# --- Momentum & Rate of Change ---
df["mom_5"] = close.pct_change(5)
df["mom_10"] = close.pct_change(10)
df["mom_20"] = close.pct_change(20)
df["roc_3"] = close.pct_change(3)
# --- Candlestick features ---
df["body"] = (close - open_) / atr_14
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / atr_14
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / atr_14
df["bar_range"] = (high - low) / atr_14
# --- Volume-proxy: price range relative momentum ---
df["high_low_ratio"] = (high - low) / close
# --- Stochastic Oscillator (14, 3) ---
lowest_low = low.rolling(14).min()
highest_high = high.rolling(14).max()
stoch_k = 100 * (close - lowest_low) / (highest_high - lowest_low).replace(0, np.nan)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_kd_diff"] = stoch_k - stoch_d
# --- Rolling volatility ---
df["vol_10"] = close.pct_change().rolling(10).std()
df["vol_20"] = close.pct_change().rolling(20).std()
df["vol_ratio"] = df["vol_10"] / df["vol_20"].replace(0, np.nan)
# --- Lagged RSI and EMA cross (for sequential signal detection) ---
df["rsi_14_lag1"] = rsi_14.shift(1)
df["rsi_14_lag2"] = rsi_14.shift(2)
df["ema_cross_lag1"] = df["ema_cross"].shift(1)
df["ema_cross_lag2"] = df["ema_cross"].shift(2)
# --- Trend alignment: both EMAs agree ---
df["trend_aligned_bull"] = np.where((ema_9 > ema_21) & (ema_21 > ema_50), 1, 0)
df["trend_aligned_bear"] = np.where((ema_9 < ema_21) & (ema_21 < ema_50), 1, 0)
# --- RSI momentum divergence proxy ---
price_chg_5 = close.pct_change(5)
rsi_chg_5 = rsi_14.diff(5)
df["rsi_price_div"] = np.where(
(price_chg_5 > 0) & (rsi_chg_5 < 0), -1,
np.where((price_chg_5 < 0) & (rsi_chg_5 > 0), 1, 0)
)
# --- SMA 50 distance (for trend_filter compatibility) ---
df["sma_50"] = close.rolling(50).mean()
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD EMA Cross RSI Gradient Boost Scalper",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"max_features": "sqrt",
"min_samples_leaf": 20,
"min_samples_split": 40,
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [0, 23],
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe) on AUD/USD 15-min bars. "
"GradientBoostingClassifier with shrinkage (lr=0.04), moderate depth (4), "
"subsampling (0.8) and sqrt feature fraction controls overfitting on a noisy FX "
"series. Early stopping (n_iter_no_change=30) prevents over-training. "
"SL=0.5%/TP=1.0% gives 1:2 RR. Threshold=0.55 filters low-confidence signals. "
"EMA 9/21 crossover with RSI 14 confirmation is the primary signal logic, "
"reinforced by MACD, Bollinger Bands, Stochastic, and multi-period momentum."
),
"notes": (
"Features: EMA 9, 21, 50, 200 distances; RSI 14 with overbought/oversold flags; "
"MACD histogram; Bollinger %B and width; Stochastic K/D; ATR-normalized candle "
"body/wicks; 5/10/20-bar momentum; rolling volatility ratio; trend alignment flags; "
"RSI-price divergence proxy. Target horizon 4 bars (1 hour ahead). "
"All features are lagged or rolling — no lookahead bias."
),
}
|
||||||||||
|
1.59
|
AUD/USD EMA Cross (9/21) + RSI14 XGBoost Scalper
Maximise risk-adjusted return on AUD/USD 15-min bars. XGBoost chosen for its ability to capture non-linear interactions between the EMA-cros…
|
E
@echo-quanta-127
|
AUDUSD | 15min | 63.6%62.4% | +13.00%+9.80% | 1.191.23 | 4.73%4.73% | 1063141 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:40:19
# Model : XGBoost
# Feature Eng. : EMA (9,21), RSI 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── EMA 9 and EMA 21 ──────────────────────────────────────────────────
ema_9 = close.ewm(span=9, adjust=False).mean()
ema_21 = close.ewm(span=21, adjust=False).mean()
df["ema_9"] = ema_9
df["ema_21"] = ema_21
df["dm_ema_9"] = (close - ema_9) / ema_9
df["dm_ema_21"] = (close - ema_21) / ema_21
# EMA crossover signal: positive when fast > slow
df["ema_cross"] = ema_9 - ema_21
df["ema_cross_prev"] = df["ema_cross"].shift(1)
# Binary: did a cross just occur?
df["ema_cross_up"] = np.where((df["ema_cross"] > 0) & (df["ema_cross_prev"] <= 0), 1, 0)
df["ema_cross_down"] = np.where((df["ema_cross"] < 0) & (df["ema_cross_prev"] >= 0), 1, 0)
# Trend direction encoded as -1 / 1
df["ema_trend"] = np.where(ema_9 > ema_21, 1, -1)
# ── RSI 14 ────────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(com=13, adjust=False).mean()
avg_loss = loss.ewm(com=13, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
rsi_14 = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi_14
# RSI regime flags
df["rsi_oversold"] = np.where(rsi_14 < 30, 1, 0)
df["rsi_overbought"] = np.where(rsi_14 > 70, 1, 0)
df["rsi_mid"] = rsi_14 - 50 # centred
# RSI momentum (1-bar change in RSI)
df["rsi_delta"] = rsi_14.diff(1)
df["rsi_delta2"] = rsi_14.diff(3)
# ── Additional momentum / volatility features ─────────────────────────
# ATR-like normalised true range
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr_14 = tr.ewm(span=14, adjust=False).mean()
df["atr_14"] = atr_14
df["natr_14"] = atr_14 / close # normalised ATR
# Rate-of-change over various horizons
for n in [4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# Bollinger Band width and %B (using 20-period SMA)
sma_20 = close.rolling(20).mean()
std_20 = close.rolling(20).std()
bb_upper = sma_20 + 2 * std_20
bb_lower = sma_20 - 2 * std_20
df["bb_width"] = (bb_upper - bb_lower) / sma_20
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower).replace(0, np.nan)
# Candle body and wick features
df["body"] = (close - open_) / close
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / close
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / close
# Volume-normalised momentum proxy: price range relative to ATR
df["range_vs_atr"] = (high - low) / atr_14.replace(0, np.nan)
# Lagged EMA cross signal
df["ema_cross_lag1"] = df["ema_cross"].shift(1)
df["ema_cross_lag2"] = df["ema_cross"].shift(2)
# Combined signal: RSI and EMA cross alignment
df["rsi_ema_bull"] = np.where((rsi_14 > 50) & (ema_9 > ema_21), 1, 0)
df["rsi_ema_bear"] = np.where((rsi_14 < 50) & (ema_9 < ema_21), 1, 0)
# Hour-of-day (cyclical encoding) — no lookahead
hour = df.index.hour
df["hour_sin"] = np.sin(2 * np.pi * hour / 24)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24)
# Day-of-week (cyclical encoding)
dow = df.index.dayofweek
df["dow_sin"] = np.sin(2 * np.pi * dow / 5)
df["dow_cos"] = np.cos(2 * np.pi * dow / 5)
# Fill NaN from warm-up periods
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD EMA Cross (9/21) + RSI14 XGBoost Scalper",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.8,
"colsample_bytree": 0.75,
"min_child_weight": 3,
"gamma": 0.1,
"reg_alpha": 0.05,
"reg_lambda": 1.5,
"objective": "binary:logistic",
"tree_method": "hist",
"random_state": 42,
},
"signal_threshold": 0.54,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [0, 23],
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise risk-adjusted return on AUD/USD 15-min bars. "
"XGBoost chosen for its ability to capture non-linear interactions between "
"the EMA-cross regime, RSI momentum, volatility (NATR/BB width), and time-of-day. "
"Shallow trees (max_depth=4) with strong regularisation (reg_lambda=1.5, gamma=0.1) "
"reduce overfitting on the limited 1-year window. "
"2:1 R:R (SL=0.5%, TP=1.0%) improves Sharpe; reverse on opposite signal captures "
"trend momentum without missing transitions."
),
"notes": (
"Features: EMA-9/21 cross and distances, RSI-14 with regime flags and delta, "
"ATR-14, NATR, Bollinger Band width/%B, 4/8/16-bar ROC, candle anatomy, "
"time cyclical encodings. Threshold 0.54 filters marginal signals to raise precision. "
"No session filter applied — AUD/USD has meaningful moves across Asian and London sessions."
),
}
|
||||||||||
|
1.59
|
Bollinger reversion
|
M
@malcolmtan
|
Bollin | 47.9%— | +1.53%— | 1.46— | 0.67%0.67% | 71— |
|
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-25 02:29:29
# Model : XGBoost
# Feature Eng. : buy when price closes below the lower Bollinger Band(20,2) and RSI(14) < 35, exit at the middle band + Auto-add features: ON
# Signal / Entry : —
# Optimization : —
# Risk Mgmt : —
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# Bollinger Band Mean-Reversion + RSI Filter (XGBoost, Sharpe)
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
# ── Inlined strategy_utils ──
"""
strategy_utils.py — Standard utility functions for generated strategies.
Claude imports these instead of writing boilerplate from scratch.
This ensures consistent behavior across all generated strategies.
"""
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
# Max backtest window per timeframe. A finer timeframe over a longer window
# blows up the results dict / parquet load / Modal train time (the 2026-05-12
# OOM was a 1-min × multi-year sweep) — and a 1-min strategy gains nothing from
# 2 years of 1-min bars. Enforced HERE because every training path (UI / API /
# Modal) funnels through run_strategy → load_ohlc. Env-overridable so a future
# "max plan" / dedicated-server tier can lift it.
_TF_MAX_DAYS = {
"1min": 30,
"5min": 90,
"15min": 365,
"1h": 730,
}
def _fetch_ohlc_from_internal(symbol: str, tf: str, start: str, end: str):
"""Phase 3.2: fetch parquet bytes from Server A's /internal/ohlc endpoint
instead of reading a local file. Used inside Modal containers / Mac worker
pool (Phase 3.4) so every train sees the same source of truth as the chart.
Returns: pd.DataFrame (parquet decoded), or raises on any failure so the
caller can fall back / surface a clear error in the job.
"""
import hashlib as _hashlib, hmac as _hmac, io as _io, os as _os
import urllib.request as _ur, urllib.parse as _urp
base = (_os.environ.get("QM_INTERNAL_OHLC_BASE") or "").rstrip("/")
secret = (_os.environ.get("INTERNAL_WS_SECRET") or "").strip()
if not base:
raise RuntimeError("QM_INTERNAL_OHLC_BASE not set")
if not secret:
raise RuntimeError("INTERNAL_WS_SECRET not set")
msg = f"{symbol}|{tf}|{start}|{end}".encode("utf-8")
sig = _hmac.new(secret.encode("utf-8"), msg, _hashlib.sha256).hexdigest()
qs = _urp.urlencode({
"symbol": symbol, "tf": tf,
"start": start, "end": end, "sig": sig,
})
url = f"{base}/internal/ohlc?{qs}"
req = _ur.Request(url, headers={"User-Agent": "qm-worker/1.0"})
with _ur.urlopen(req, timeout=30) as resp:
if resp.status != 200:
raise RuntimeError(f"/internal/ohlc returned {resp.status}")
payload = resp.read()
print(f"[load_ohlc:internal] {symbol} {tf} fetched {len(payload)} bytes", flush=True)
return pd.read_parquet(_io.BytesIO(payload))
def _parse_symbol_tf_from_path(data_path: str):
"""Pull SYMBOL + TF out of a path like .../EURUSD_1min.parquet."""
import os as _os, re as _re
base = _os.path.basename(str(data_path))
m = _re.match(r"^([A-Z]{6})_(\d+min|\d+h)\.parquet$", base)
if not m:
return None, None
return m.group(1), m.group(2)
def load_ohlc(data_path, start_date="", end_date=""):
"""Load OHLC parquet, sort index, filter dates. Always returns consistent format.
The lower bound is clamped per timeframe (see _TF_MAX_DAYS) — a request for
more history than the cap silently starts later.
Phase 3.2: when env QM_USE_INTERNAL_OHLC=="1", fetch over HTTP from
Server A's /internal/ohlc endpoint instead of pd.read_parquet on a local
file (which on Modal is a stale Volume snapshot). The endpoint applies the
same day-cap, so the local cap-check below is a defensive no-op in that
path. Flag defaults to "0" → unchanged behavior.
Returns: (df, close, open_, high, low)
"""
import os as _os, re as _re
_use_internal = _os.environ.get("QM_USE_INTERNAL_OHLC", "0") == "1"
if _use_internal:
_sym, _tf = _parse_symbol_tf_from_path(data_path)
if not _sym or not _tf:
raise RuntimeError(
f"QM_USE_INTERNAL_OHLC=1 but DATA_PATH basename does not match "
f"SYMBOL_TF.parquet: {data_path}"
)
df = _fetch_ohlc_from_internal(_sym, _tf, start_date or "", end_date or "")
else:
df = pd.read_parquet(data_path)
df.index = pd.to_datetime(df.index)
df = df.sort_index()
# Per-timeframe window cap (timeframe inferred from the parquet filename).
_m = _re.search(r"_(\d+min|\d+h)\.parquet$", _os.path.basename(str(data_path)))
_tf = _m.group(1) if _m else None
_max_days = _TF_MAX_DAYS.get(_tf)
if _max_days and _max_days > 0 and len(df):
_env_override = _os.environ.get(f"QM_MAX_DAYS_{_tf.upper()}")
if _env_override and _env_override.isdigit():
_max_days = int(_env_override)
try:
_eff_end = pd.Timestamp(end_date) if end_date else df.index.max()
_eff_end = min(_eff_end, df.index.max())
_floor = _eff_end - pd.Timedelta(days=_max_days)
_req_start = pd.Timestamp(start_date) if start_date else df.index.min()
if _req_start < _floor:
print(f"[load_ohlc] {_tf} backtest window capped to {_max_days}d: "
f"start {_req_start.date()} -> {_floor.date()}", flush=True)
start_date = _floor
except Exception as _e:
print(f"[load_ohlc] window-cap check skipped ({_e})", flush=True)
if start_date:
df = df[df.index >= start_date]
if end_date:
df = df[df.index <= end_date]
return df, df["close"], df["open"], df["high"], df["low"]
def make_target(close, horizon=4):
"""Create target: direction N bars ahead. Default 4 bars = 1 hour on 15-min data.
Returns: target (pd.Series of -1, 0, 1)
"""
return np.sign(close.shift(-horizon) - close)
def split_data(df, target, feature_cols, train_split=0.7, validation_date=""):
"""Train/test split. Handles both ratio and date-based splits.
Drops NaN from target before splitting. Encodes labels to [0,1,2].
Returns: dict with keys:
X_train, X_test, y_train, y_test,
y_train_enc, y_test_enc, enc,
close_train, close_test,
split_idx, split_dt, n_train, n_test
"""
# Drop NaN from target
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
# Build feature matrix
X = df[feature_cols].copy()
X = X.bfill().ffill()
X = X.replace([np.inf, -np.inf], np.nan).fillna(0.0)
# Split
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
X_train = X.iloc[:split_idx]
X_test = X.iloc[split_idx:]
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
split_dt = str(df.index[split_idx])
# Label encoding — always fit on [-1, 0, 1]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
return {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
def compute_overlays(close, df_index):
"""Compute BB and MA overlays on full dataset. Always consistent.
Returns: (bb_dict, ma_dict)
"""
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
ma50 = close.rolling(50).mean()
ma100 = close.rolling(100).mean()
ma200 = close.rolling(200).mean()
def _safe(s):
s = s.reindex(df_index).bfill().ffill()
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in s.values]
bb = {"upper": _safe(bb_upper), "mid": _safe(bb_mid), "lower": _safe(bb_lower)}
ma = {"ma50": _safe(ma50), "ma100": _safe(ma100), "ma200": _safe(ma200)}
return bb, ma
def run_backtest(signal, close, capital=10000, cost=2e-5):
"""Run backtest with transaction costs.
Uses price-based trade returns (same as webapp _compute_trades).
Signal 0 = hold (keep current position), not close.
Returns: dict with equity, trade_returns, long_returns, short_returns, bar_returns
"""
sig_arr = signal.values
price_arr = close.values
idx = signal.index
n = len(price_arr)
# Trade returns — price-based (matches webapp _compute_trades exactly)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
last_dir = None
entry_price = None
entry_bar = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != last_dir:
# Direction change — close previous trade, open new
if last_dir is not None and entry_price is not None and entry_price != 0:
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[i]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "signal",
})
entry_price = c
entry_bar = i
last_dir = s
# Close last open trade
if last_dir is not None and entry_price is not None and n > 0 and entry_price != 0:
c = price_arr[-1]
ret = float(last_dir * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if last_dir == 1:
long_returns.append(ret)
else:
short_returns.append(ret)
trade_log.append({
"type": "Buy" if last_dir == 1 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[-1]),
"entry_price": round(entry_price, 5),
"exit_price": round(c, 5),
"pnl": round(last_dir * (c - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": "end",
})
# Equity curve from trade returns
cumret = 1.0
equity_vals = np.full(n, float(capital))
trade_idx = 0
in_trade = False
t_entry_price = None
t_dir = None
for i in range(n):
s = sig_arr[i]
c = price_arr[i]
if s != 0.0 and s != t_dir:
if t_dir is not None and t_entry_price is not None and t_entry_price != 0:
t_ret = t_dir * (c - t_entry_price) / t_entry_price - cost
cumret *= (1 + t_ret)
t_entry_price = c
t_dir = s
equity_vals[i] = capital * cumret
# Bar returns for Sharpe
bar_returns = np.zeros(n)
for i in range(1, n):
if price_arr[i - 1] != 0 and last_dir is not None:
bar_returns[i] = sig_arr[i - 1] * (price_arr[i] - price_arr[i - 1]) / price_arr[i - 1] if sig_arr[i - 1] != 0 else 0.0
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
def compute_trade_stats(trades, capital=10000):
"""Single source of truth for trade statistics.
Every display path reads from this — no recomputation anywhere.
All values are rounded and JSON-safe (no inf/nan).
"""
if not trades:
return {"n": 0, "wins": 0, "losses": 0, "wr": 0, "avg": 0,
"best": 0, "worst": 0, "ret": 0, "np": 0, "mdd": 0,
"pf": 0, "rr": 0, "expect": 0}
w = [r for r in trades if r > 0]
l = [r for r in trades if r < 0]
cumret = 1.0
for r in trades:
cumret *= (1 + r)
net_p = capital * (cumret - 1)
# Max drawdown
eq = np.cumprod([1.0] + [1 + r for r in trades])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
# Profit Factor
gross_w = sum(w) if w else 0
gross_l = abs(sum(l)) if l else 0
pf = gross_w / gross_l if gross_l > 0 else (9999.0 if gross_w > 0 else 0)
# Risk:Reward
avg_w = float(np.mean(w)) if w else 0
avg_l = abs(float(np.mean(l))) if l else 0
rr = avg_w / avg_l if avg_l > 0 else (9999.0 if avg_w > 0 else 0)
# Expectancy
expect = net_p / len(trades)
return {
"n": len(trades), "wins": len(w), "losses": len(l),
"wr": round(len(w) / len(trades), 4),
"avg": round(float(np.mean(trades)), 6),
"best": round(max(w), 6) if w else 0,
"worst": round(min(l), 6) if l else 0,
"ret": round(cumret - 1, 6),
"np": round(net_p, 2),
"mdd": round(mdd, 6),
"pf": round(pf, 2),
"rr": round(rr, 2),
"expect": round(expect, 2),
}
def compute_metrics(bt_result, close_test, capital=10000):
"""Compute all standard metrics from backtest result.
Uses trade-level compounding (same as webapp _trade_stats) for accuracy.
Returns: dict with total_ret, bh_ret, sharpe_strat, sharpe_bh, mdd, n_trades
"""
equity = bt_result["equity"]
trade_returns = bt_result["trade_returns"]
# Total return — trade-level compounding (matches webapp)
if trade_returns:
cumret = 1.0
for r in trade_returns:
cumret *= (1 + r)
total_ret = cumret - 1
else:
total_ret = 0.0
# Buy and hold
bh_equity = capital * (close_test / close_test.iloc[0])
bh_ret = (bh_equity.iloc[-1] - capital) / capital if capital != 0 else 0.0
# Sharpe ratio — trade-level (matches webapp: sqrt(252*26) annualization)
if len(trade_returns) >= 2 and float(np.std(trade_returns)) > 0:
sharpe_strat = float(np.mean(trade_returns) / np.std(trade_returns) * np.sqrt(252 * 26))
else:
sharpe_strat = 0.0
bh_rets = bh_equity.pct_change().dropna()
if len(bh_rets) > 1 and bh_rets.std() != 0:
sharpe_bh = float((bh_rets.mean() / bh_rets.std()) * np.sqrt(252 * 24 * 4))
else:
sharpe_bh = 0.0
# Max drawdown — trade-level (matches webapp)
if trade_returns:
eq = np.cumprod([1.0] + [1 + r for r in trade_returns])
peak = np.maximum.accumulate(eq)
mdd = float(((eq - peak) / peak).min()) if len(eq) > 1 else 0.0
else:
mdd = 0.0
return {
"total_ret": float(total_ret),
"bh_ret": float(bh_ret),
"sharpe_strat": float(sharpe_strat) if not np.isnan(sharpe_strat) else 0.0,
"sharpe_bh": float(sharpe_bh) if not np.isnan(sharpe_bh) else 0.0,
"mdd": float(mdd),
"n_trades": len(trade_returns),
}
# Diagnostics line/histogram series (equity / drawdown / rolling_acc / conf_hist)
# only feed the small Diagnostics charts — they're never used by the price chart
# or scroll-back. On a 1-min model trained over the (2.2-capped) window these are
# still ~30k points each; downsample to a visually-identical resolution before the
# dict leaves the trainer so it doesn't carry that into Server-A RAM / Postgres.
_RESULTS_SERIES_MAX = 5000
def _downsample_idx(n, cap=_RESULTS_SERIES_MAX):
"""Evenly-spaced index list spanning [0, n-1] (first+last always kept), or
None when no downsampling is needed (n <= cap)."""
if n <= cap:
return None
return np.unique(np.linspace(0, n - 1, cap).astype(int)).tolist()
def _take(arr, idx):
"""Subset a list by an index list (idx may be None → return arr unchanged)."""
if idx is None or not isinstance(arr, list):
return arr
return [arr[i] for i in idx]
# trade_log / train_trade_log are lists of per-trade dicts (display-only — the
# Trade Log tab). They scale with TRADE count, not bar count, so the bar-window
# cap (Phase 2.2) doesn't bound them — a degenerate near-every-bar model can put
# 10k+ trade dicts in the blob (>3 MB). Cap each (independently — a small-N model
# keeps every trade) to the most-recent N, recording `*_total` + `*_truncated`
# so the true count is still reported. Real strategies have far fewer than
# _TRADE_LOG_MAX trades, so this only ever bites pathological models.
_TRADE_LOG_MAX = 5000
def _cap_trade_log(tl):
"""Return (capped_list, original_len, was_truncated)."""
if not isinstance(tl, list) or len(tl) <= _TRADE_LOG_MAX:
return tl, (len(tl) if isinstance(tl, list) else 0), False
return tl[-_TRADE_LOG_MAX:], len(tl), True
def build_return_dict(split_result, bt_result, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=None,
bt_train_result=None, pre_stats=None):
"""Assemble the complete return dict. Handles ALL serialization.
Never returns Timestamps, numpy arrays, or non-JSON types.
Returns: JSON-safe dict with all required keys
"""
df = split_result["df"]
close = split_result["close"]
close_test = split_result["close_test"]
X_test = split_result["X_test"]
y_test = split_result["y_test"]
equity = bt_result["equity"]
bar_returns = bt_result["bar_returns"]
# OHLC
ohlc_dates = [str(x) for x in df.index.tolist()]
def _safe_list(arr):
return [float(x) if (x is not None and not np.isnan(x) and not np.isinf(x)) else None
for x in arr]
# Overlays
bb, ma = compute_overlays(close, df.index)
# Buy and hold equity
capital = equity.iloc[0] if len(equity) > 0 else 10000
bh_equity = capital * (close_test / close_test.iloc[0])
# Confusion matrix
from sklearn.metrics import confusion_matrix
pred_test = model.predict(X_test)
y_test_arr = np.asarray(y_test)
cm = confusion_matrix(y_test_arr, pred_test, labels=[-1, 0, 1])
# Rolling accuracy
sig_arr = signal_full.reindex(close_test.index).values
correct = pd.Series((pred_test == y_test_arr).astype(float), index=X_test.index)
active_test = pd.Series(sig_arr != 0, index=close_test.index) if len(sig_arr) == len(close_test) else pd.Series(True, index=close_test.index)
correct_active = correct.where(active_test, other=np.nan)
rolling_acc = correct_active.rolling(30, min_periods=1).mean()
# Feature importance
importances = model.feature_importances_
fi_pairs = sorted(zip(feature_cols, importances), key=lambda x: x[1])[-15:]
# Drawdown
rolling_max = equity.cummax()
drawdown = (equity - rolling_max) / rolling_max.replace(0, np.nan)
drawdown = drawdown.fillna(0.0)
# ── Downsample the Diagnostics-only series (see _downsample_idx) ──────────
_eq_dates = [str(x) for x in close_test.index.tolist()]
_eq_strat = _safe_list(equity.values)
_eq_bh = _safe_list(bh_equity.values)
_eq_idx = _downsample_idx(len(_eq_dates))
_eq_dates, _eq_strat, _eq_bh = _take(_eq_dates, _eq_idx), _take(_eq_strat, _eq_idx), _take(_eq_bh, _eq_idx)
_ra_dates = [str(x) for x in rolling_acc.index.tolist()]
_ra_vals = [float(x) if (not np.isnan(x) and not np.isinf(x)) else None for x in rolling_acc.values]
_ra_idx = _downsample_idx(len(_ra_dates))
_ra_dates, _ra_vals = _take(_ra_dates, _ra_idx), _take(_ra_vals, _ra_idx)
_dd_dates = [str(x) for x in drawdown.index.tolist()]
_dd_vals = _safe_list(drawdown.values)
_dd_idx = _downsample_idx(len(_dd_dates))
_dd_dates, _dd_vals = _take(_dd_dates, _dd_idx), _take(_dd_vals, _dd_idx)
_cp_pos = [float(x) for x in (p_pos_test.tolist() if hasattr(p_pos_test, 'tolist') else list(p_pos_test))]
_cp_neg = [float(x) for x in (p_neg_test.tolist() if hasattr(p_neg_test, 'tolist') else list(p_neg_test))]
_cp_pos = _take(_cp_pos, _downsample_idx(len(_cp_pos)))
_cp_neg = _take(_cp_neg, _downsample_idx(len(_cp_neg)))
# ── Trade logs — display-only (Trade Log tab); cap to most-recent N with a
# `_total` field so the true count is still reported (see _cap_trade_log).
# NB: ret_dist arrays are left FULL — a downstream path in callbacks.py
# recomputes n_trades/win-rate from len(ret_dist), so a sample would skew
# the displayed counts; they're small anyway and gzip handles them.
_tl_test, _tl_test_n, _tl_test_tr = _cap_trade_log(bt_result.get("trade_log", []))
_tl_tr, _tl_tr_n, _tl_tr_tr = _cap_trade_log(bt_train_result.get("trade_log", []) if bt_train_result else [])
return {
"ohlc": {
"dates": ohlc_dates,
"open": _safe_list(df["open"].values),
"high": _safe_list(df["high"].values),
"low": _safe_list(df["low"].values),
"close": _safe_list(df["close"].values),
},
"signals": {
"dates": [str(x) for x in signal_full.index.tolist()],
"values": [float(x) for x in signal_full.values],
},
"bb": bb,
"ma": ma,
"equity": {
"dates": _eq_dates,
"strategy": _eq_strat,
"bh": _eq_bh,
},
"feature_importance": {
"names": [p[0] for p in fi_pairs],
"values": [float(p[1]) for p in fi_pairs],
},
"conf_matrix": cm.tolist(),
"conf_hist": {
"p_pos": _cp_pos,
"p_neg": _cp_neg,
},
"rolling_acc": {
"dates": _ra_dates,
"values": _ra_vals,
},
"drawdown": {
"dates": _dd_dates,
"values": _dd_vals,
},
"ret_dist": [float(x) for x in bt_result["trade_returns"]],
"ret_dist_long": [float(x) for x in bt_result["long_returns"]],
"ret_dist_short": [float(x) for x in bt_result["short_returns"]],
"train_ret_dist": [float(x) for x in bt_train_result["trade_returns"]] if bt_train_result else [],
"train_ret_dist_long": [float(x) for x in bt_train_result["long_returns"]] if bt_train_result else [],
"train_ret_dist_short": [float(x) for x in bt_train_result["short_returns"]] if bt_train_result else [],
"trade_log": _tl_test,
"train_trade_log": _tl_tr,
"trade_log_total": _tl_test_n,
"train_trade_log_total": _tl_tr_n,
"trade_log_truncated": _tl_test_tr,
"train_trade_log_truncated": _tl_tr_tr,
**(pre_stats or {}),
"metrics": metrics,
"split_dt": split_result["split_dt"],
"split_idx": int(split_result["split_idx"]),
"n_train": int(split_result["n_train"]),
"n_test": int(split_result["n_test"]),
"feature_cols": list(feature_cols),
"custom_figs": custom_figs or [],
}
# ════════════════════════════════════════════════════════════════════════════
# STRATEGY FRAMEWORK v2 — Config-driven architecture
# Claude writes feature_engineering() + strategy_config(). Framework does rest.
# ════════════════════════════════════════════════════════════════════════════
import importlib
_MODEL_REGISTRY = {
"XGBClassifier": ("xgboost", "XGBClassifier"),
"RandomForestClassifier": ("sklearn.ensemble", "RandomForestClassifier"),
"GradientBoostingClassifier": ("sklearn.ensemble", "GradientBoostingClassifier"),
"LogisticRegression": ("sklearn.linear_model", "LogisticRegression"),
"ExtraTreesClassifier": ("sklearn.ensemble", "ExtraTreesClassifier"),
"AdaBoostClassifier": ("sklearn.ensemble", "AdaBoostClassifier"),
}
def _build_model_from_config(config, X_train, y_train_enc):
"""Build, fit, and wrap a model from strategy_config dict."""
model_type = config.get("model_type", "RandomForestClassifier")
model_params = dict(config.get("model_params", {}))
if model_type not in _MODEL_REGISTRY:
raise ValueError(f"Unknown model_type '{model_type}'. Valid: {list(_MODEL_REGISTRY.keys())}")
module_path, class_name = _MODEL_REGISTRY[model_type]
mod = importlib.import_module(module_path)
cls = getattr(mod, class_name)
# XGBoost defaults
if class_name == "XGBClassifier":
model_params.setdefault("use_label_encoder", False)
model_params.setdefault("eval_metric", "mlogloss")
model_params.setdefault("tree_method", "hist")
# Determinism > speed (2026-05-25). XGBoost hist with n_jobs=-1 is
# NON-reproducible even with random_state set — the parallel histogram
# gradient-sum order varies across threads, so the SAME code + data
# gives a slightly different model (and backtest) every run. Forcing
# single-thread makes training bit-reproducible so: (a) a user who
# copies a strategy and reruns it gets identical numbers, (b) the
# community "Live" score matches a redeploy, (c) "same code, different
# result" support reports go away. Cost: single-threaded XGB (a few
# seconds slower on large windows; hist is fast so it's minor). FORCED
# (not setdefault) so the guarantee can't be silently broken by a
# strategy passing n_jobs. Exact reproducibility holds within the
# platform (pinned versions / same Modal image); a user's own machine
# with different xgboost/numpy/CPU can still differ in low-order bits.
model_params["n_jobs"] = 1
# Common defaults
model_params.setdefault("random_state", 42)
from model_wrapper import ModelWrapper
clf = cls(**model_params)
clf.fit(X_train, y_train_enc)
enc = LabelEncoder()
enc.fit([-1, 0, 1])
return ModelWrapper(clf, original_classes=enc.classes_, n_features=X_train.shape[1])
def _generate_signals(model, X, threshold):
"""Framework-owned signal generation. Deterministic threshold logic."""
proba = model.predict_proba(X)
classes = list(model.classes_)
idx_pos = classes.index(1) if 1 in classes else None
idx_neg = classes.index(-1) if -1 in classes else None
p_pos = proba[:, idx_pos] if idx_pos is not None else np.zeros(len(X))
p_neg = proba[:, idx_neg] if idx_neg is not None else np.zeros(len(X))
signal_vals = np.zeros(len(X))
signal_vals = np.where(p_pos >= threshold, 1.0, signal_vals)
signal_vals = np.where(p_neg >= threshold, -1.0, signal_vals)
# Both exceed: pick stronger
both = (p_pos >= threshold) & (p_neg >= threshold)
signal_vals[both] = np.where(p_pos[both] >= p_neg[both], 1.0, -1.0)
return pd.Series(signal_vals, index=X.index), p_pos, p_neg
# ── Filter functions (all no-ops when config value is None) ──────────────
def _apply_direction_filter(signal, direction):
"""Zero out signals that don't match allowed direction."""
if direction is None or direction == "both":
return signal
s = signal.copy()
if direction == "long":
s[s < 0] = 0.0
elif direction == "short":
s[s > 0] = 0.0
return s
def _apply_session_filter(signal, index, session_hours):
"""Zero out signals outside session hours [start, end] UTC."""
if session_hours is None:
return signal
s = signal.copy()
start_h, end_h = session_hours[0], session_hours[1]
hours = index.hour
if start_h <= end_h:
mask = (hours >= start_h) & (hours < end_h)
else: # wrap around midnight, e.g. [22, 6]
mask = (hours >= start_h) | (hours < end_h)
s[~mask] = 0.0
return s
def _apply_atr_filter(signal, close, high, low, min_atr):
"""Zero out signals when NATR(14) is below threshold."""
if min_atr is None:
return signal
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr14 = tr.ewm(com=13, adjust=False).mean()
natr = atr14 / close.replace(0, np.nan)
s = signal.copy()
s[natr < min_atr] = 0.0
return s
def _apply_trend_filter(signal, close, trend_filter):
"""Only allow signals aligned with trend. e.g. 'sma_50': longs above SMA, shorts below."""
if trend_filter is None:
return signal
# Parse: "sma_50" → SMA with period 50
parts = trend_filter.lower().replace("-", "_").split("_")
if len(parts) >= 2 and parts[0] in ("sma", "ema"):
period = int(parts[1])
else:
return signal # unknown filter, skip
if parts[0] == "sma":
trend_line = close.rolling(period).mean()
else:
trend_line = close.ewm(span=period, adjust=False).mean()
s = signal.copy()
# Longs only above trend, shorts only below
s[(s > 0) & (close < trend_line)] = 0.0
s[(s < 0) & (close > trend_line)] = 0.0
return s
# ── run_backtest_v2: framework-owned SL/TP/cooldown/position management ──
def run_backtest_v2(signal, close, high, low, config, capital=10000, cost=2e-5):
"""Backtest with SL/TP/cooldown/direction handling built into the engine.
Unlike run_backtest (v1), this function handles position exits internally.
Returns: same dict shape as run_backtest()
"""
stop_loss = config.get("stop_loss")
take_profit = config.get("take_profit")
cooldown = config.get("cooldown", 0)
on_opposite = config.get("on_opposite", "reverse")
sig_arr = signal.values
close_arr = close.values
high_arr = high.values
low_arr = low.values
idx = signal.index
n = len(close_arr)
trade_returns = []
long_returns = []
short_returns = []
trade_log = []
equity_vals = np.full(n, float(capital))
cumret = 1.0
position = 0.0 # current direction: 1.0, -1.0, or 0.0 (flat)
entry_price = None
entry_bar = None # index into arrays for entry time
cooldown_remaining = 0
def _log_trade(exit_bar, exit_px, ret, reason):
trade_log.append({
"type": "Buy" if position == 1.0 else "Sell",
"entry_time": str(idx[entry_bar]),
"exit_time": str(idx[exit_bar]),
"entry_price": round(entry_price, 5),
"exit_price": round(exit_px, 5),
"pnl": round(position * (exit_px - entry_price), 5),
"pnl_pct": round(ret * 100, 3),
"exit_reason": reason,
})
for i in range(n):
c = close_arr[i]
h = high_arr[i]
lo = low_arr[i]
s = sig_arr[i]
# 1. Check SL/TP if in trade
if position != 0.0 and entry_price is not None:
hit_sl = False
hit_tp = False
exit_price = None
if position == 1.0: # long
if stop_loss is not None and lo <= entry_price * (1 - stop_loss):
hit_sl = True
exit_price = entry_price * (1 - stop_loss)
elif take_profit is not None and h >= entry_price * (1 + take_profit):
hit_tp = True
exit_price = entry_price * (1 + take_profit)
else: # short
if stop_loss is not None and h >= entry_price * (1 + stop_loss):
hit_sl = True
exit_price = entry_price * (1 + stop_loss)
elif take_profit is not None and lo <= entry_price * (1 - take_profit):
hit_tp = True
exit_price = entry_price * (1 - take_profit)
if hit_sl or hit_tp:
ret = float(position * (exit_price - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, exit_price, ret, "SL" if hit_sl else "TP")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
continue
# 2. Cooldown
if cooldown_remaining > 0:
cooldown_remaining -= 1
equity_vals[i] = capital * cumret
continue
# 3. Signal processing
if s != 0.0:
if position == 0.0:
# Open new trade
position = s
entry_price = c
entry_bar = i
elif s != position:
# Opposite signal
if on_opposite == "reverse":
# Close current + open opposite
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "signal")
cumret *= (1 + ret)
position = s
entry_price = c
entry_bar = i
else: # close_only
# Close current, go flat
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(i, c, ret, "close_only")
cumret *= (1 + ret)
position = 0.0
entry_price = None
entry_bar = None
cooldown_remaining = cooldown
equity_vals[i] = capital * cumret
# Close last open trade at final close
if position != 0.0 and entry_price is not None and n > 0 and entry_price != 0:
c = close_arr[-1]
ret = float(position * (c - entry_price) / entry_price - cost)
trade_returns.append(ret)
if position == 1.0:
long_returns.append(ret)
else:
short_returns.append(ret)
_log_trade(n - 1, c, ret, "end")
cumret *= (1 + ret)
equity_vals[-1] = capital * cumret
# Bar returns for Sharpe (approximate)
bar_returns = np.zeros(n)
for i in range(1, n):
if close_arr[i - 1] != 0 and sig_arr[i - 1] != 0:
bar_returns[i] = sig_arr[i - 1] * (close_arr[i] - close_arr[i - 1]) / close_arr[i - 1]
return {
"equity": pd.Series(equity_vals, index=close.index),
"trade_returns": trade_returns,
"long_returns": long_returns,
"short_returns": short_returns,
"bar_returns": bar_returns,
"trade_log": trade_log,
}
# ── run_strategy: the v2 orchestrator ────────────────────────────────────
def run_strategy(feature_fn, config_fn, data_path, start_date="", end_date="",
validation_date="", train_split=0.7, register_model_fn=None):
"""Config-driven strategy execution. Claude writes feature_fn + config_fn,
framework does everything else.
Returns: results dict (same format as webapp expects)
"""
config = config_fn()
# Auto-correct SL/TP if Claude passed percentage instead of decimal
for _key in ("stop_loss", "take_profit"):
_val = config.get(_key)
if _val is not None and _val > 0.1: # >10% is almost certainly a percentage
config[_key] = _val / 100.0
print(f"[strategy] Auto-corrected {_key}: {_val} -> {config[_key]} (was percentage, converted to decimal)")
# 1. Load data
df, close, open_, high, low = load_ohlc(data_path, start_date, end_date)
# 2. Feature engineering (Claude's function)
df = feature_fn(df, close, open_, high, low)
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 3. Warm-up detection: drop rows where features have NaN BEFORE any fill
feature_cols = [c for c in df.columns if c not in ("open", "high", "low", "close")]
raw_nans = df[feature_cols].isna().any(axis=1)
valid_rows = ~raw_nans
if valid_rows.any():
first_valid = valid_rows.idxmax()
if raw_nans.loc[:first_valid].any():
df = df.loc[first_valid:].copy()
close = df["close"]
open_ = df["open"]
high = df["high"]
low = df["low"]
# 4. Target
horizon = config.get("target_horizon", 4)
target = make_target(close, horizon=horizon)
# 5. Split (ffill only within each partition — no bfill leak)
mask = target.notna()
df = df[mask].copy()
target = target[mask]
close = df["close"]
high = df["high"]
low = df["low"]
X = df[feature_cols].copy()
X = X.replace([np.inf, -np.inf], np.nan)
if validation_date:
split_idx = len(df[df.index <= validation_date])
else:
split_idx = int(len(df) * train_split)
split_idx = max(1, min(split_idx, len(df) - 1))
# ffill within train and test separately (no leak)
X_train = X.iloc[:split_idx].ffill().fillna(0.0)
X_test = X.iloc[split_idx:].ffill().fillna(0.0)
X = pd.concat([X_train, X_test])
y_train = target.iloc[:split_idx]
y_test = target.iloc[split_idx:]
close_train = close.iloc[:split_idx]
close_test = close.iloc[split_idx:]
high_test = high.iloc[split_idx:]
low_test = low.iloc[split_idx:]
enc = LabelEncoder()
enc.fit([-1, 0, 1])
y_train_enc = enc.transform(y_train)
y_test_enc = enc.transform(y_test)
split_dt = str(df.index[split_idx])
sp = {
"df": df, "X_train": X_train, "X_test": X_test,
"y_train": y_train, "y_test": y_test,
"y_train_enc": y_train_enc, "y_test_enc": y_test_enc,
"enc": enc,
"close": close, "close_train": close_train, "close_test": close_test,
"split_idx": split_idx, "split_dt": split_dt,
"n_train": len(X_train), "n_test": len(X_test),
}
# 6. Build model from config
model = _build_model_from_config(config, X_train, y_train_enc)
# 7. Generate signals
threshold = config.get("signal_threshold", 0.55)
signal_train, p_pos_train, p_neg_train = _generate_signals(model, X_train, threshold)
signal_test, p_pos_test, p_neg_test = _generate_signals(model, X_test, threshold)
# 8. Apply filters (order: direction → session → ATR → trend)
direction = config.get("direction", "both")
signal_test = _apply_direction_filter(signal_test, direction)
signal_train = _apply_direction_filter(signal_train, direction)
session_filter = config.get("session_filter")
signal_test = _apply_session_filter(signal_test, signal_test.index, session_filter)
signal_train = _apply_session_filter(signal_train, signal_train.index, session_filter)
min_atr = config.get("min_atr")
if min_atr is not None:
signal_test = _apply_atr_filter(signal_test, close_test, high_test, low_test, min_atr)
trend_filter = config.get("trend_filter")
if trend_filter is not None:
signal_test = _apply_trend_filter(signal_test, close_test, trend_filter)
signal_full = pd.concat([signal_train, signal_test])
# 9. Backtest with SL/TP/cooldown (test + train)
high_train = high.iloc[:split_idx]
low_train = low.iloc[:split_idx]
has_risk = (config.get("stop_loss") is not None or
config.get("take_profit") is not None or
config.get("cooldown", 0) > 0 or
config.get("on_opposite", "reverse") != "reverse")
if has_risk:
bt = run_backtest_v2(signal_test, close_test, high_test, low_test, config, capital=10000)
bt_train = run_backtest_v2(signal_train, close_train, high_train, low_train, config, capital=10000)
else:
bt = run_backtest(signal_test, close_test, capital=10000)
bt_train = run_backtest(signal_train, close_train, capital=10000)
# 10. Metrics
metrics = compute_metrics(bt, close_test, capital=10000)
# 11. Pre-compute all trade stats (single source of truth)
pre_stats = {
"train_stats": compute_trade_stats(bt_train.get("trade_returns", []), capital=10000),
"test_stats": compute_trade_stats(bt.get("trade_returns", []), capital=10000),
"long_stats": compute_trade_stats(bt.get("long_returns", []), capital=10000),
"short_stats": compute_trade_stats(bt.get("short_returns", []), capital=10000),
}
# 12. Register model
if register_model_fn is not None:
register_model_fn(model)
# 13. Build return dict
return build_return_dict(sp, bt, metrics, model, feature_cols,
signal_full, p_pos_test, p_neg_test, custom_figs=[],
bt_train_result=bt_train, pre_stats=pre_stats)
# ── End strategy_utils ──
DATA_PATH = '/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet'
START_DATE = '2026-04-15'
END_DATE = '2026-05-25'
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
# %B — position of close within the band (0 = lower, 1 = upper)
bb_range = bb_upper - bb_lower
df["bb_pct_b"] = np.where(bb_range > 0, (close - bb_lower) / bb_range, 0.5)
# Bandwidth — normalised band width (regime filter)
df["bb_bandwidth"] = np.where(bb_mid > 0, bb_range / bb_mid, 0.0)
# Distance from each band (signed, normalised by sigma)
df["dist_lower"] = np.where(bb_sigma > 0, (close - bb_lower) / bb_sigma, 0.0)
df["dist_upper"] = np.where(bb_sigma > 0, (bb_upper - close) / bb_sigma, 0.0)
df["dist_mid"] = np.where(bb_sigma > 0, (close - bb_mid) / bb_sigma, 0.0)
# Below lower band flag
df["below_lower"] = np.where(close < bb_lower, 1, 0)
# Above upper band flag
df["above_upper"] = np.where(close > bb_upper, 1, 0)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
avg_loss = loss.ewm(com=rsi_period - 1, min_periods=rsi_period).mean()
rs = np.where(avg_loss > 0, avg_gain / avg_loss, 100.0)
rsi = 100.0 - 100.0 / (1.0 + rs)
df["rsi"] = rsi
# RSI-derived flags and distances
df["rsi_oversold"] = np.where(rsi < 35, 1, 0)
df["rsi_overbought"] = np.where(rsi > 65, 1, 0)
df["rsi_dist_35"] = rsi - 35.0 # negative when oversold
df["rsi_dist_65"] = rsi - 65.0 # positive when overbought
df["rsi_norm"] = (rsi - 50.0) / 50.0 # centred, ±1 range
# ── Core entry condition features ────────────────────────────────────────
# Buy setup: close < lower BB AND RSI < 35
df["long_setup"] = np.where((close < bb_lower) & (rsi < 35), 1, 0)
# Sell setup: close > upper BB AND RSI > 65
df["short_setup"] = np.where((close > bb_upper) & (rsi > 65), 1, 0)
# ── ATR (14) — volatility context ────────────────────────────────────────
atr_period = 14
hl = high - low
hc = (high - close.shift(1)).abs()
lc = (low - close.shift(1)).abs()
tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
atr = tr.ewm(com=atr_period - 1, min_periods=atr_period).mean()
df["atr"] = atr
df["natr"] = np.where(close > 0, atr / close, 0.0)
# ── Momentum / Rate-of-Change ─────────────────────────────────────────────
for n in [1, 3, 5, 10]:
df[f"roc_{n}"] = np.where(
close.shift(n) > 0,
(close - close.shift(n)) / close.shift(n),
0.0
)
# ── EMA trend context (fast / slow) ──────────────────────────────────────
ema_fast = close.ewm(span=9, min_periods=9).mean()
ema_slow = close.ewm(span=21, min_periods=21).mean()
df["ema_fast"] = ema_fast
df["ema_slow"] = ema_slow
df["ema_diff"] = np.where(ema_slow > 0, (ema_fast - ema_slow) / ema_slow, 0.0)
df["ema_bull"] = np.where(ema_fast > ema_slow, 1, 0)
# SMA-50 trend filter helper (used by framework trend_filter)
df["sma_50"] = close.rolling(50).mean()
# ── Candle body & wick features ───────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = (body / candle_rng).fillna(0.0)
df["upper_wick"] = np.where(candle_rng.notna(), (high - close.clip(lower=open_)) / candle_rng.fillna(1), 0.0)
df["lower_wick"] = np.where(candle_rng.notna(), (close.clip(upper=open_) - low) / candle_rng.fillna(1), 0.0)
df["bull_candle"] = np.where(close > open_, 1, 0)
# ── Volume-like proxy — true range z-score ────────────────────────────────
tr_mean = tr.rolling(20).mean()
tr_std = tr.rolling(20).std(ddof=0).replace(0, np.nan)
df["tr_zscore"] = ((tr - tr_mean) / tr_std).fillna(0.0)
# ── Lagged RSI and %B (1, 2, 3 bars back) ────────────────────────────────
for lag in [1, 2, 3]:
df[f"rsi_lag{lag}"] = df["rsi"].shift(lag)
df[f"bb_pct_b_lag{lag}"] = df["bb_pct_b"].shift(lag)
# ── RSI slope ────────────────────────────────────────────────────────────
df["rsi_slope3"] = df["rsi"] - df["rsi"].shift(3)
# ── Mean-reversion proximity: how far price is from middle band ───────────
df["pct_to_mid"] = np.where(close > 0, (bb_mid - close) / close, 0.0)
# ── Fill any NaNs from warm-up ────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "BB Mean-Reversion + RSI Oversold/Overbought (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 500,
"max_depth": 4,
"learning_rate": 0.03,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 5,
"gamma": 0.1,
"reg_alpha": 0.05,
"reg_lambda": 1.5,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.0010,
"take_profit": 0.0020,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 17],
"min_atr": 0.00005,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize Sharpe ratio by exploiting Bollinger Band mean-reversion "
"with RSI confirmation. Entry conditions (close < lower BB, RSI < 35 "
"for longs; close > upper BB, RSI > 65 for shorts) are encoded as "
"features together with momentum, ATR volatility, candle structure, "
"and lagged indicators. XGBoost with strong regularisation "
"(reg_lambda=1.5, gamma=0.1, min_child_weight=5) and a low learning "
"rate avoids overfitting on the 6-week window. Session filter "
"[7,17] UTC targets liquid London/NY overlap, reducing noise. "
"TP:SL ratio of 2:1 supports positive expected value even at "
"moderate win rates, pushing Sharpe higher."
),
"notes": (
"Features: %B position, RSI (raw + flags + slope + lags), "
"EMA cross, ATR/NATR, ROC(1/3/5/10), candle body/wick ratios, "
"TR z-score, distance-to-midband, long/short setup flags. "
"Round-trip cost ~2e-5 is implicitly absorbed by the 10-pip TP target. "
"Cooldown=0 allows immediate re-entry after mean-reversion completes."
),
}
# ── Framework v2: auto-generated wrapper ──
def train_and_backtest():
_vd = VALIDATION_DATE if 'VALIDATION_DATE' in globals() else ''
_ts = TRAIN_SPLIT if 'TRAIN_SPLIT' in globals() else 0.7
return run_strategy(
feature_engineering, strategy_config,
DATA_PATH, START_DATE, END_DATE,
_vd, _ts,
register_model_fn=register_model
)
|
||||||||||
|
1.31
|
EUR/USD SMA Trend + Multi-Indicator GBM Scalper
Maximise risk-adjusted return (Sharpe / Calmar). GradientBoostingClassifier chosen for its strong performance on tabular financial data with…
|
S
@still-lynx-704
|
EURUSD | 15min | 39.7%43.5% | +5.29%+4.25% | 1.621.29 | 1.82%1.82% | 7323 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:35:43
# Model : Gradient Boosting
# Feature Eng. : SMA (20,50,200) + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/EURUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── SMA core features (required) ──────────────────────────────────────
for period in [20, 50, 200]:
sma = close.rolling(period).mean()
df[f"sma_{period}"] = sma
df[f"dm_sma_{period}"] = (close - sma) / sma
# ── SMA slope (momentum of the moving average itself) ──────────────────
for period in [20, 50, 200]:
df[f"sma_{period}_slope"] = df[f"sma_{period}"].diff(5) / df[f"sma_{period}"].shift(5)
# ── SMA crossover signals ──────────────────────────────────────────────
df["sma_20_50_cross"] = df["sma_20"] - df["sma_50"]
df["sma_50_200_cross"] = df["sma_50"] - df["sma_200"]
df["sma_20_200_cross"] = df["sma_20"] - df["sma_200"]
# Sign of crossover difference (trend direction)
df["trend_20_50"] = np.where(df["sma_20_50_cross"] > 0, 1, -1)
df["trend_50_200"] = np.where(df["sma_50_200_cross"] > 0, 1, -1)
# ── Price momentum ────────────────────────────────────────────────────
for lag in [1, 4, 8, 16, 32]:
df[f"return_{lag}"] = close.pct_change(lag)
# ── Volatility features ───────────────────────────────────────────────
# True Range
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
for atr_period in [14, 50]:
atr = tr.rolling(atr_period).mean()
df[f"atr_{atr_period}"] = atr
df[f"natr_{atr_period}"] = atr / close
# Rolling realised volatility
log_ret = np.log(close / close.shift(1))
for vol_period in [20, 50]:
df[f"realvol_{vol_period}"] = log_ret.rolling(vol_period).std()
# ── Bollinger Bands (20, 2σ) ──────────────────────────────────────────
bb_mid = close.rolling(20).mean()
bb_std = close.rolling(20).std()
bb_upper = bb_mid + 2 * bb_std
bb_lower = bb_mid - 2 * bb_std
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower + 1e-12)
df["bb_position"] = (close - bb_mid) / (bb_std + 1e-12)
# ── RSI (14) ──────────────────────────────────────────────────────────
delta = close.diff()
gain = delta.clip(lower=0).rolling(14).mean()
loss = (-delta.clip(upper=0)).rolling(14).mean()
rs = gain / (loss + 1e-12)
df["rsi_14"] = 100 - (100 / (1 + rs))
# RSI normalised to [-1, 1]
df["rsi_14_norm"] = (df["rsi_14"] - 50) / 50
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
df["macd_hist_chg"] = df["macd_hist"].diff()
# Normalise MACD by price
df["macd_norm"] = df["macd"] / close
df["macd_hist_norm"] = df["macd_hist"] / close
# ── Stochastic Oscillator (14, 3) ─────────────────────────────────────
low14 = low.rolling(14).min()
high14 = high.rolling(14).max()
stoch_k = 100 * (close - low14) / (high14 - low14 + 1e-12)
stoch_d = stoch_k.rolling(3).mean()
df["stoch_k"] = stoch_k
df["stoch_d"] = stoch_d
df["stoch_diff"] = stoch_k - stoch_d
# ── Rate-of-change ────────────────────────────────────────────────────
for roc_period in [5, 10, 20]:
df[f"roc_{roc_period}"] = (close - close.shift(roc_period)) / (close.shift(roc_period) + 1e-12)
# ── Candle body and wick features ─────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_rng
df["upper_wick_ratio"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick_ratio"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["candle_direction"] = np.where(close >= open_, 1, -1)
# ── Time-of-day features (hour) ───────────────────────────────────────
hour = df.index.hour
df["hour_sin"] = np.sin(2 * np.pi * hour / 24)
df["hour_cos"] = np.cos(2 * np.pi * hour / 24)
# ── Lagged returns as features ────────────────────────────────────────
for lag in [1, 2, 3, 4]:
df[f"close_lag_{lag}"] = close.shift(lag)
df[f"ret_lag_{lag}"] = log_ret.shift(lag)
# ── Volume proxy: range-based ─────────────────────────────────────────
df["range_abs"] = high - low
df["range_norm"] = (high - low) / close
# ── High-Low channel position ─────────────────────────────────────────
for ch_period in [20, 50]:
ch_high = high.rolling(ch_period).max()
ch_low = low.rolling(ch_period).min()
df[f"channel_pos_{ch_period}"] = (close - ch_low) / (ch_high - ch_low + 1e-12)
# ── Fill NaN from warm-up ─────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "EUR/USD SMA Trend + Multi-Indicator GBM Scalper",
"model_type": "GradientBoostingClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"max_features": "sqrt",
"min_samples_leaf": 20,
"min_samples_split": 40,
"validation_fraction": 0.1,
"n_iter_no_change": 30,
"tol": 1e-4,
"random_state": 42,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [6, 18],
"min_atr": None,
"trend_filter": "sma_50",
"target_horizon": 4,
"objective": (
"Maximise risk-adjusted return (Sharpe / Calmar). "
"GradientBoostingClassifier chosen for its strong performance on "
"tabular financial data with moderate feature sets. "
"Shallow trees (max_depth=4) with high n_estimators and a low "
"learning_rate reduce overfitting. subsample=0.75 adds stochastic "
"regularisation. Early stopping via n_iter_no_change prevents "
"over-training on the validation split. "
"SL=0.5%, TP=1.0% gives a 1:2 risk-reward ratio, "
"targeting positive expectancy even at sub-60% accuracy. "
"Session filter (06-18 UTC) restricts trading to liquid hours "
"covering London and New York overlap for EUR/USD. "
"SMA-50 trend filter ensures trades align with the medium-term "
"trend, reducing counter-trend noise."
),
"notes": (
"Features include required SMA(20,50,200) distances and crossovers, "
"RSI-14, MACD histogram, Bollinger Band position, Stochastic, ATR, "
"realised volatility, rate-of-change, candle structure ratios, "
"channel position, lagged returns, and cyclical time encoding. "
"Target horizon of 4 bars (1 hour) on 15-min data balances "
"signal frequency with meaningful directional moves."
),
}
|
||||||||||
|
1.24
|
USD/CAD BB Mean-Reversion + ATR XGBoost
Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min using Bollinger Band mean-reversion signals augmented by ATR, RSI, MACD, and…
|
C
@candle_owl
|
USDCAD | 15min | 59.1%59.1% | +4.84%+2.54% | 1.311.11 | 1.34%1.34% | 36266 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 02:36:58
# Model : XGBoost
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/USDCAD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
df["bb_pct"] = (close - bb_lower) / (bb_upper - bb_lower)
# ── ATR (14) & Normalised ATR ────────────────────────────────────────────
atr_period = 14
tr = pd.concat([
high - low,
(high - close.shift(1)).abs(),
(low - close.shift(1)).abs()
], axis=1).max(axis=1)
atr = tr.ewm(alpha=1.0 / atr_period, min_periods=atr_period, adjust=False).mean()
natr = atr / close
df["atr"] = atr
df["natr"] = natr
# ── Price momentum / returns ─────────────────────────────────────────────
df["ret_1"] = close.pct_change(1)
df["ret_4"] = close.pct_change(4)
df["ret_8"] = close.pct_change(8)
df["ret_16"] = close.pct_change(16)
# ── Distance from Bollinger mid / bands ──────────────────────────────────
df["close_minus_mid"] = (close - bb_mid) / bb_mid
df["close_minus_upper"] = (close - bb_upper) / bb_mid
df["close_minus_lower"] = (close - bb_lower) / bb_mid
# ── BB squeeze flag: width below rolling median ───────────────────────────
bb_width_med = df["bb_width"].rolling(50).median()
df["bb_squeeze"] = np.where(df["bb_width"] < bb_width_med, 1.0, 0.0)
# ── BB mean-reversion z-score ────────────────────────────────────────────
df["bb_z"] = (close - bb_mid) / (bb_sigma + 1e-12)
# ── Candle body / wick features ──────────────────────────────────────────
body = (close - open_).abs()
candle_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = body / candle_rng
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / candle_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / candle_rng
df["bull_candle"] = np.where(close > open_, 1.0, 0.0)
# ── RSI (14) built from scratch ──────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_g = gain.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_l = loss.ewm(alpha=1.0 / rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_g / (avg_l + 1e-12)
rsi = 100.0 - (100.0 / (1.0 + rs))
df["rsi_14"] = rsi
# RSI deviation from neutral 50
df["rsi_dev"] = (rsi - 50.0) / 50.0
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
macd_sig = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line / close
df["macd_hist"] = (macd_line - macd_sig) / close
# ── Rolling volatility (realised over 20 bars) ───────────────────────────
df["vol_20"] = df["ret_1"].rolling(20).std()
# ── ATR z-score vs 50-bar rolling mean ───────────────────────────────────
atr_mean = atr.rolling(50).mean()
atr_std = atr.rolling(50).std(ddof=0)
df["atr_z"] = (atr - atr_mean) / (atr_std + 1e-12)
# ── Volume-of-BB-touches over last 10 bars ───────────────────────────────
near_upper = (close >= bb_upper * 0.998).astype(float)
near_lower = (close <= bb_lower * 1.002).astype(float)
df["touch_upper_10"] = near_upper.rolling(10).sum()
df["touch_lower_10"] = near_lower.rolling(10).sum()
# ── SMA 50 (trend filter helper) ─────────────────────────────────────────
df["sma_50"] = close.rolling(50).mean()
df["close_vs_sma"] = (close - df["sma_50"]) / df["sma_50"]
# ── EMA cross (9 / 21) ───────────────────────────────────────────────────
ema9 = close.ewm(span=9, adjust=False).mean()
ema21 = close.ewm(span=21, adjust=False).mean()
df["ema_cross"] = (ema9 - ema21) / close
# ── Bar-of-day / session ─────────────────────────────────────────────────
if hasattr(df.index, "hour"):
df["hour_sin"] = np.sin(2 * np.pi * df.index.hour / 24.0)
df["hour_cos"] = np.cos(2 * np.pi * df.index.hour / 24.0)
else:
df["hour_sin"] = 0.0
df["hour_cos"] = 1.0
# ── Lag features on bb_pct and rsi ───────────────────────────────────────
for lag in [1, 2, 4]:
df[f"bb_pct_lag{lag}"] = df["bb_pct"].shift(lag)
df[f"rsi_14_lag{lag}"] = df["rsi_14"].shift(lag)
df[f"macd_hist_lag{lag}"] = df["macd_hist"].shift(lag)
# ── Fill NaN from warm-up ─────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "USD/CAD BB Mean-Reversion + ATR XGBoost",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 3,
"gamma": 0.15,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.55,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [7, 20],
"min_atr": 0.0002,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximise risk-adjusted return (Sharpe/Calmar) on USD/CAD 15-min "
"using Bollinger Band mean-reversion signals augmented by ATR, RSI, "
"MACD, and EMA-cross features fed into a regularised XGBoost classifier. "
"SL=0.5% / TP=1.0% gives a 1:2 RR floor. Conservative depth (4) and "
"strong L1/L2 regularisation prevent overfitting on a single year of data."
),
"notes": (
"BB squeeze flag and bb_z capture regime; atr_z filters noisy bars. "
"Session filter 07-20 UTC covers London + NY overlap for tighter spreads. "
"min_atr=0.0002 avoids dead-market whipsaws. Lag features on bb_pct and "
"rsi_14 give the model short-term momentum context without look-ahead."
),
}
|
||||||||||
|
0.84
|
AUD/USD Bollinger + ATR Mean-Rev (XGBoost)
Maximize risk-adjusted return (Sharpe). XGBoost with moderate depth and heavy regularisation (gamma, alpha, lambda) prevents overfit on AUD/…
|
E
@elastic-moose-350
|
AUDUSD | 15min | 63.9%63.7% | +6.79%+3.66% | 1.121.12 | 3.10%3.10% | 65680 |
|
# ╔══════════════════════════════════════════════════════════════╗
# ║ STRATEGY REQUEST LOG ║
# ╚══════════════════════════════════════════════════════════════╝
# Generated : 2026-05-06 01:44:26
# Model : XGBoost
# Feature Eng. : BB (20,2.0), ATR 14 + Auto-add features: ON
# Signal / Entry : Enter when model confidence > threshold; exit on opposite signal or SL/TP
# Optimization : Maximize risk-adjusted return
# Risk Mgmt : Stop loss 0.5%, Take profit 1.0%
# Risk Filter : —
# ══════════════════════════════════════════════════════════════
# ============================================================
# SECTION 0 — IMPORTS & CONSTANTS
import numpy as np
import pandas as pd
DATA_PATH = "/root/Desktop/QuantifyMe/data/ohlc/AUDUSD_15min.parquet"
START_DATE = "2025-04-24"
END_DATE = "2026-04-24"
VALIDATION_DATE = ""
TRAIN_SPLIT = 0.7
# SECTION 1 — FEATURE ENGINEERING
def feature_engineering(df, close, open_, high, low):
# ── Bollinger Bands (20, 2) ──────────────────────────────────────────────
bb_period = 20
bb_std = 2.0
bb_mid = close.rolling(bb_period).mean()
bb_sigma = close.rolling(bb_period).std(ddof=0)
bb_upper = bb_mid + bb_std * bb_sigma
bb_lower = bb_mid - bb_std * bb_sigma
df["bb_mid"] = bb_mid
df["bb_upper"] = bb_upper
df["bb_lower"] = bb_lower
df["bb_width"] = (bb_upper - bb_lower) / bb_mid
# guard against zero range
bb_range = bb_upper - bb_lower
df["bb_pct"] = np.where(bb_range != 0, (close - bb_lower) / bb_range, 0.5)
# ── ATR (14) & Normalised ATR ────────────────────────────────────────────
atr_period = 14
prev_close = close.shift(1)
tr = pd.concat([
high - low,
(high - prev_close).abs(),
(low - prev_close).abs()
], axis=1).max(axis=1)
atr = tr.ewm(span=atr_period, min_periods=atr_period, adjust=False).mean()
df["atr"] = atr
df["natr"] = np.where(close != 0, atr / close, 0.0)
# ── RSI (14) ─────────────────────────────────────────────────────────────
rsi_period = 14
delta = close.diff()
gain = delta.clip(lower=0)
loss = (-delta).clip(lower=0)
avg_gain = gain.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
avg_loss = loss.ewm(span=rsi_period, min_periods=rsi_period, adjust=False).mean()
rs = avg_gain / avg_loss.replace(0, np.nan)
rsi = 100 - (100 / (1 + rs))
df["rsi_14"] = rsi
# ── MACD (12, 26, 9) ─────────────────────────────────────────────────────
ema12 = close.ewm(span=12, adjust=False).mean()
ema26 = close.ewm(span=26, adjust=False).mean()
macd_line = ema12 - ema26
signal_line = macd_line.ewm(span=9, adjust=False).mean()
df["macd"] = macd_line
df["macd_signal"] = signal_line
df["macd_hist"] = macd_line - signal_line
# ── EMA trend features ───────────────────────────────────────────────────
ema_20 = close.ewm(span=20, adjust=False).mean()
ema_50 = close.ewm(span=50, adjust=False).mean()
ema_200 = close.ewm(span=200, adjust=False).mean()
df["ema_20"] = ema_20
df["ema_50"] = ema_50
df["ema_200"] = ema_200
df["close_vs_ema20"] = (close - ema_20) / ema_20
df["close_vs_ema50"] = (close - ema_50) / ema_50
df["ema20_vs_ema50"] = (ema_20 - ema_50) / ema_50
df["ema50_vs_ema200"] = (ema_50 - ema_200) / ema_200
# ── Price momentum / rate-of-change ──────────────────────────────────────
for n in [1, 4, 8, 16]:
df[f"roc_{n}"] = close.pct_change(n)
# ── Rolling volatility ────────────────────────────────────────────────────
df["vol_10"] = close.pct_change().rolling(10).std()
df["vol_20"] = close.pct_change().rolling(20).std()
df["vol_ratio"] = np.where(df["vol_20"] != 0,
df["vol_10"] / df["vol_20"], 1.0)
# ── Stochastic %K / %D (14, 3) ───────────────────────────────────────────
low_14 = low.rolling(14).min()
high_14 = high.rolling(14).max()
stoch_range = high_14 - low_14
stoch_k = np.where(stoch_range != 0,
100 * (close - low_14) / stoch_range, 50.0)
df["stoch_k"] = stoch_k
df["stoch_d"] = pd.Series(stoch_k, index=close.index).rolling(3).mean()
# ── Candle body / wick features ──────────────────────────────────────────
body = (close - open_).abs()
total_rng = (high - low).replace(0, np.nan)
df["body_ratio"] = body / total_rng
df["upper_wick"] = (high - pd.concat([close, open_], axis=1).max(axis=1)) / total_rng
df["lower_wick"] = (pd.concat([close, open_], axis=1).min(axis=1) - low) / total_rng
df["candle_dir"] = np.sign(close - open_)
# ── BB interaction features ───────────────────────────────────────────────
df["price_above_bb_upper"] = np.where(close > bb_upper, 1, 0)
df["price_below_bb_lower"] = np.where(close < bb_lower, 1, 0)
df["bb_squeeze"] = np.where(df["bb_width"] < df["bb_width"].rolling(50).quantile(0.20), 1, 0)
# ── RSI regime bins (replacing pd.cut) ───────────────────────────────────
df["rsi_oversold"] = np.where(rsi < 30, 1, 0)
df["rsi_overbought"] = np.where(rsi > 70, 1, 0)
df["rsi_neutral"] = np.where((rsi >= 30) & (rsi <= 70), 1, 0)
# ── Volume proxy (if volume column exists) ───────────────────────────────
if "volume" in df.columns:
vol_ma = df["volume"].rolling(20).mean()
df["volume_ratio"] = np.where(vol_ma != 0,
df["volume"] / vol_ma, 1.0)
# ── Lagged features (1-bar lag to avoid lookahead) ───────────────────────
for feat in ["bb_pct", "rsi_14", "macd_hist", "natr", "stoch_k"]:
df[f"{feat}_lag1"] = df[feat].shift(1)
df[f"{feat}_lag2"] = df[feat].shift(2)
# ── Fill NaNs from warm-up ────────────────────────────────────────────────
df = df.bfill().ffill()
return df
# SECTION 2 — STRATEGY CONFIG
def strategy_config():
return {
"title": "AUD/USD Bollinger + ATR Mean-Rev (XGBoost)",
"model_type": "XGBClassifier",
"model_params": {
"n_estimators": 400,
"max_depth": 4,
"learning_rate": 0.04,
"subsample": 0.75,
"colsample_bytree": 0.70,
"min_child_weight": 3,
"gamma": 0.10,
"reg_alpha": 0.10,
"reg_lambda": 1.50,
"objective": "binary:logistic",
"random_state": 42,
"n_jobs": -1,
},
"signal_threshold": 0.56,
"direction": "both",
"stop_loss": 0.005,
"take_profit": 0.010,
"cooldown": 0,
"max_positions": 1,
"on_opposite": "reverse",
"session_filter": [0, 23],
"min_atr": None,
"trend_filter": None,
"target_horizon": 4,
"objective": (
"Maximize risk-adjusted return (Sharpe). "
"XGBoost with moderate depth and heavy regularisation "
"(gamma, alpha, lambda) prevents overfit on AUD/USD 15-min data. "
"Bollinger Bands capture mean-reversion; ATR normalises volatility; "
"RSI + MACD confirm momentum; 2:1 TP:SL ratio supports positive expectancy."
),
"notes": (
"Features: BB (20,2) width/pct, ATR-14/NATR, RSI-14, MACD histogram, "
"EMA 20/50/200 spreads, Stochastic %K/%D, candle-body ratios, "
"ROC at multiple horizons, volatility ratio, BB squeeze flag, "
"lagged versions of key features. "
"Threshold 0.56 filters marginal signals, improving precision. "
"target_horizon=4 (1 hour) balances signal frequency vs. noise."
),
}
|
||||||||||