Files
strategy32/scripts/run_current_cash_classifier_blocker.py

194 lines
7.3 KiB
Python

from __future__ import annotations
import json
import sys
from dataclasses import asdict, dataclass
from pathlib import Path
import pandas as pd
PACKAGE_PARENT = Path(__file__).resolve().parents[2]
if str(PACKAGE_PARENT) not in sys.path:
sys.path.insert(0, str(PACKAGE_PARENT))
from strategy32.live.runtime import BEST_CASH_OVERLAY
from strategy32.research.soft_router import build_cash_overlay_period_components, load_component_bundle, score_candidate
from strategy32.scripts.run_current_cash_learned_blocker import (
CACHE_PATH,
CURRENT_OVERHEAT_OVERRIDES,
_build_block_dataset,
_build_regime_columns,
_build_strategy_detail,
_curve_from_returns,
_feature_columns,
_metrics_for_curve,
_oracle_blocker_curve,
_ridge_predict,
)
OUT_JSON = Path("/tmp/strategy32_current_cash_classifier_blocker.json")
@dataclass(frozen=True, slots=True)
class ClassifierBlockerCandidate:
block_bars: int
train_min_blocks: int
lookback_blocks: int
ridge_alpha: float
probability_threshold: float
blocked_scale: float
@property
def name(self) -> str:
return (
f"block:{self.block_bars}"
f"|train:{self.train_min_blocks}"
f"|lookback:{self.lookback_blocks}"
f"|alpha:{self.ridge_alpha:.2f}"
f"|pth:{self.probability_threshold:.2f}"
f"|blocked:{self.blocked_scale:.2f}"
)
def _simulate_candidate(
detail: pd.DataFrame,
block_frame: pd.DataFrame,
regime_columns: list[str],
candidate: ClassifierBlockerCandidate,
) -> pd.Series:
rows = detail.reset_index(drop=True)
features = _feature_columns(regime_columns)
returns: list[float] = []
idx: list[pd.Timestamp] = []
for block_idx, block in block_frame.iterrows():
start_idx = int(block["block_start_index"])
end_idx = int(block["block_end_index"])
bar_block = rows.iloc[start_idx : end_idx + 1]
exposure_scale = 1.0
if block_idx >= candidate.train_min_blocks:
train_start = max(0, block_idx - candidate.lookback_blocks)
train = block_frame.iloc[train_start:block_idx].copy()
train_x = train[features].to_numpy(dtype=float)
train_y = (train["block_total"] > 0.0).astype(float).to_numpy(dtype=float)
test_x = block[features].to_numpy(dtype=float)
pred = _ridge_predict(train_x, train_y, test_x, candidate.ridge_alpha)
if pred < candidate.probability_threshold:
exposure_scale = candidate.blocked_scale
for row in bar_block.itertuples(index=False):
returns.append(float(getattr(row, "portfolio_return")) * exposure_scale)
idx.append(pd.Timestamp(getattr(row, "timestamp")))
return pd.Series(returns, index=pd.DatetimeIndex(idx, name="timestamp"), dtype=float)
def _candidate_space() -> list[ClassifierBlockerCandidate]:
space: list[ClassifierBlockerCandidate] = []
for block_bars in (21, 42):
for train_min_blocks in (8, 12, 18):
for lookback_blocks in (24, 60, 120):
if lookback_blocks < train_min_blocks:
continue
for ridge_alpha in (0.5, 1.0, 5.0, 20.0):
for probability_threshold in (0.45, 0.50, 0.55, 0.60):
for blocked_scale in (0.0, 0.25, 0.50):
space.append(
ClassifierBlockerCandidate(
block_bars=block_bars,
train_min_blocks=train_min_blocks,
lookback_blocks=lookback_blocks,
ridge_alpha=ridge_alpha,
probability_threshold=probability_threshold,
blocked_scale=blocked_scale,
)
)
return space
def main() -> None:
bundle, latest_bar = load_component_bundle(CACHE_PATH)
eval_start = latest_bar - pd.Timedelta(days=1825)
print("[phase] build current baseline", flush=True)
components = build_cash_overlay_period_components(
bundle=bundle,
eval_start=eval_start,
eval_end=latest_bar,
profile_name=BEST_CASH_OVERLAY.regime_profile,
core_filter=BEST_CASH_OVERLAY.core_filter,
cap_engine=BEST_CASH_OVERLAY.cap_engine,
chop_engine=BEST_CASH_OVERLAY.chop_engine,
dist_engine=BEST_CASH_OVERLAY.dist_engine,
core_config_overrides=CURRENT_OVERHEAT_OVERRIDES,
)
detail = _build_strategy_detail(components)
regime_columns = _build_regime_columns(detail)
baseline_curve = _curve_from_returns(detail.set_index("timestamp")["portfolio_return"])
baseline_windows, baseline_years, baseline_score, baseline_negative_years, baseline_mdd_violations = _metrics_for_curve(baseline_curve, latest_bar)
oracle_summary: dict[str, object] = {}
for block in (21, 42):
oracle_curve = _curve_from_returns(_oracle_blocker_curve(detail, block))
windows, years, score, negative_years, mdd_violations = _metrics_for_curve(oracle_curve, latest_bar)
oracle_summary[f"oracle_block_{block}"] = {
"score": score,
"negative_years": negative_years,
"mdd_violations": mdd_violations,
"windows": windows,
"years": years,
}
candidates = _candidate_space()
print(f"[phase] classifier blocker search {len(candidates)} candidates", flush=True)
best_payload: dict[str, object] | None = None
search_top: list[dict[str, object]] = []
for idx, candidate in enumerate(candidates, start=1):
block_frame = _build_block_dataset(detail, candidate.block_bars, regime_columns)
simulated_returns = _simulate_candidate(detail, block_frame, regime_columns, candidate)
curve = _curve_from_returns(simulated_returns)
windows, years, score, negative_years, mdd_violations = _metrics_for_curve(curve, latest_bar)
payload = {
"candidate": asdict(candidate),
"name": candidate.name,
"score": score,
"negative_years": negative_years,
"mdd_violations": mdd_violations,
"windows": windows,
"years": years,
}
search_top.append(payload)
search_top.sort(key=lambda item: float(item["score"]), reverse=True)
search_top = search_top[:5]
if best_payload is None or score > float(best_payload["score"]):
best_payload = payload
if idx % max(1, len(candidates) // 8) == 0:
print(f"[search] {idx}/{len(candidates)}", flush=True)
assert best_payload is not None
output = {
"analysis": "current_cash_classifier_blocker",
"latest_bar": str(latest_bar),
**best_payload,
"baseline": {
"score": baseline_score,
"negative_years": baseline_negative_years,
"mdd_violations": baseline_mdd_violations,
"windows": baseline_windows,
"years": baseline_years,
},
"oracle": oracle_summary,
"search_top": search_top,
}
print(json.dumps(output, indent=2))
OUT_JSON.write_text(json.dumps(output, indent=2), encoding="utf-8")
print(f"[saved] {OUT_JSON}", flush=True)
if __name__ == "__main__":
main()