from __future__ import annotations import json import sys from dataclasses import asdict, dataclass from pathlib import Path import pandas as pd PACKAGE_PARENT = Path(__file__).resolve().parents[2] if str(PACKAGE_PARENT) not in sys.path: sys.path.insert(0, str(PACKAGE_PARENT)) from strategy32.live.runtime import BEST_CASH_OVERLAY from strategy32.research.soft_router import build_cash_overlay_period_components, load_component_bundle, score_candidate from strategy32.scripts.run_current_cash_learned_blocker import ( CACHE_PATH, CURRENT_OVERHEAT_OVERRIDES, _build_block_dataset, _build_regime_columns, _build_strategy_detail, _curve_from_returns, _feature_columns, _metrics_for_curve, _oracle_blocker_curve, _ridge_predict, ) OUT_JSON = Path("/tmp/strategy32_current_cash_classifier_blocker.json") @dataclass(frozen=True, slots=True) class ClassifierBlockerCandidate: block_bars: int train_min_blocks: int lookback_blocks: int ridge_alpha: float probability_threshold: float blocked_scale: float @property def name(self) -> str: return ( f"block:{self.block_bars}" f"|train:{self.train_min_blocks}" f"|lookback:{self.lookback_blocks}" f"|alpha:{self.ridge_alpha:.2f}" f"|pth:{self.probability_threshold:.2f}" f"|blocked:{self.blocked_scale:.2f}" ) def _simulate_candidate( detail: pd.DataFrame, block_frame: pd.DataFrame, regime_columns: list[str], candidate: ClassifierBlockerCandidate, ) -> pd.Series: rows = detail.reset_index(drop=True) features = _feature_columns(regime_columns) returns: list[float] = [] idx: list[pd.Timestamp] = [] for block_idx, block in block_frame.iterrows(): start_idx = int(block["block_start_index"]) end_idx = int(block["block_end_index"]) bar_block = rows.iloc[start_idx : end_idx + 1] exposure_scale = 1.0 if block_idx >= candidate.train_min_blocks: train_start = max(0, block_idx - candidate.lookback_blocks) train = block_frame.iloc[train_start:block_idx].copy() train_x = train[features].to_numpy(dtype=float) train_y = (train["block_total"] > 0.0).astype(float).to_numpy(dtype=float) test_x = block[features].to_numpy(dtype=float) pred = _ridge_predict(train_x, train_y, test_x, candidate.ridge_alpha) if pred < candidate.probability_threshold: exposure_scale = candidate.blocked_scale for row in bar_block.itertuples(index=False): returns.append(float(getattr(row, "portfolio_return")) * exposure_scale) idx.append(pd.Timestamp(getattr(row, "timestamp"))) return pd.Series(returns, index=pd.DatetimeIndex(idx, name="timestamp"), dtype=float) def _candidate_space() -> list[ClassifierBlockerCandidate]: space: list[ClassifierBlockerCandidate] = [] for block_bars in (21, 42): for train_min_blocks in (8, 12, 18): for lookback_blocks in (24, 60, 120): if lookback_blocks < train_min_blocks: continue for ridge_alpha in (0.5, 1.0, 5.0, 20.0): for probability_threshold in (0.45, 0.50, 0.55, 0.60): for blocked_scale in (0.0, 0.25, 0.50): space.append( ClassifierBlockerCandidate( block_bars=block_bars, train_min_blocks=train_min_blocks, lookback_blocks=lookback_blocks, ridge_alpha=ridge_alpha, probability_threshold=probability_threshold, blocked_scale=blocked_scale, ) ) return space def main() -> None: bundle, latest_bar = load_component_bundle(CACHE_PATH) eval_start = latest_bar - pd.Timedelta(days=1825) print("[phase] build current baseline", flush=True) components = build_cash_overlay_period_components( bundle=bundle, eval_start=eval_start, eval_end=latest_bar, profile_name=BEST_CASH_OVERLAY.regime_profile, core_filter=BEST_CASH_OVERLAY.core_filter, cap_engine=BEST_CASH_OVERLAY.cap_engine, chop_engine=BEST_CASH_OVERLAY.chop_engine, dist_engine=BEST_CASH_OVERLAY.dist_engine, core_config_overrides=CURRENT_OVERHEAT_OVERRIDES, ) detail = _build_strategy_detail(components) regime_columns = _build_regime_columns(detail) baseline_curve = _curve_from_returns(detail.set_index("timestamp")["portfolio_return"]) baseline_windows, baseline_years, baseline_score, baseline_negative_years, baseline_mdd_violations = _metrics_for_curve(baseline_curve, latest_bar) oracle_summary: dict[str, object] = {} for block in (21, 42): oracle_curve = _curve_from_returns(_oracle_blocker_curve(detail, block)) windows, years, score, negative_years, mdd_violations = _metrics_for_curve(oracle_curve, latest_bar) oracle_summary[f"oracle_block_{block}"] = { "score": score, "negative_years": negative_years, "mdd_violations": mdd_violations, "windows": windows, "years": years, } candidates = _candidate_space() print(f"[phase] classifier blocker search {len(candidates)} candidates", flush=True) best_payload: dict[str, object] | None = None search_top: list[dict[str, object]] = [] for idx, candidate in enumerate(candidates, start=1): block_frame = _build_block_dataset(detail, candidate.block_bars, regime_columns) simulated_returns = _simulate_candidate(detail, block_frame, regime_columns, candidate) curve = _curve_from_returns(simulated_returns) windows, years, score, negative_years, mdd_violations = _metrics_for_curve(curve, latest_bar) payload = { "candidate": asdict(candidate), "name": candidate.name, "score": score, "negative_years": negative_years, "mdd_violations": mdd_violations, "windows": windows, "years": years, } search_top.append(payload) search_top.sort(key=lambda item: float(item["score"]), reverse=True) search_top = search_top[:5] if best_payload is None or score > float(best_payload["score"]): best_payload = payload if idx % max(1, len(candidates) // 8) == 0: print(f"[search] {idx}/{len(candidates)}", flush=True) assert best_payload is not None output = { "analysis": "current_cash_classifier_blocker", "latest_bar": str(latest_bar), **best_payload, "baseline": { "score": baseline_score, "negative_years": baseline_negative_years, "mdd_violations": baseline_mdd_violations, "windows": baseline_windows, "years": baseline_years, }, "oracle": oracle_summary, "search_top": search_top, } print(json.dumps(output, indent=2)) OUT_JSON.write_text(json.dumps(output, indent=2), encoding="utf-8") print(f"[saved] {OUT_JSON}", flush=True) if __name__ == "__main__": main()