strategy32/scripts/run_cash_overlay_search.py

from __future__ import annotations

import itertools
import json
import sys
from dataclasses import asdict
from pathlib import Path

import pandas as pd

PACKAGE_PARENT = Path(__file__).resolve().parents[2]
if str(PACKAGE_PARENT) not in sys.path:
    sys.path.insert(0, str(PACKAGE_PARENT))

from strategy29.backtest.window_analysis import slice_bundle
from strategy32.backtest.simulator import Strategy32Backtester
from strategy32.config import PROFILE_V7_DEFAULT, build_strategy32_config
from strategy32.research.hybrid_regime import STATIC_FILTERS
from strategy32.research.soft_router import (
    WINDOWS,
    YEAR_PERIODS,
    YTD_START,
    CashOverlayCandidate,
    build_cash_overlay_period_components,
    compose_cash_overlay_curve,
    evaluate_cash_overlay_exact,
    load_component_bundle,
    score_candidate,
    segment_metrics,
)


OUT_JSON = Path("/tmp/strategy32_cash_overlay_search.json")
OUT_MD = Path("/Volumes/SSD/data/nextcloud/data/tara/files/📂HeadOffice/money-bot/strategy32/017_cash_overlay_탐색결과.md")
SOFT_JSON = Path("/tmp/strategy32_best_soft_exact.json")

PROFILE = "loose_positive"
CORE_FILTER = "overheat_tolerant"
CAP_ENGINE = "cap_btc_rebound"
CHOP_ENGINE = "chop_inverse_carry_strict"
DIST_ENGINE = "dist_inverse_carry_strict"

STATIC_BASELINE = {
    "name": "overheat_tolerant",
    "windows": {
        "1y": {"total_return": 0.1477, "annualized_return": 0.1477, "max_drawdown": -0.1229},
        "2y": {"total_return": 0.2789, "annualized_return": 0.1309, "max_drawdown": -0.1812},
        "3y": {"total_return": 0.4912, "annualized_return": 0.1425, "max_drawdown": -0.1931},
        "4y": {"total_return": 0.3682, "annualized_return": 0.0815, "max_drawdown": -0.3461},
        "5y": {"total_return": 3.7625, "annualized_return": 0.3664, "max_drawdown": -0.2334},
    },
    "years": {
        "2026_YTD": {"total_return": 0.0, "max_drawdown": 0.0},
        "2025": {"total_return": 0.0426, "max_drawdown": -0.1323},
        "2024": {"total_return": 0.1951, "max_drawdown": -0.2194},
        "2023": {"total_return": 0.4670, "max_drawdown": -0.2155},
        "2022": {"total_return": 0.0147, "max_drawdown": -0.0662},
        "2021": {"total_return": 1.9152, "max_drawdown": -0.1258},
    },
}

EXPOSURE_SUMMARY = {
    "avg_cash_pct": 0.9379,
    "median_cash_pct": 1.0,
    "cash_gt_50_pct": 0.9469,
    "cash_gt_80_pct": 0.9068,
    "avg_momentum_pct": 0.0495,
    "avg_carry_pct": 0.0126,
}

CAP_CASH_WEIGHTS = (0.20, 0.35, 0.50, 0.65)
CHOP_CASH_WEIGHTS = (0.10, 0.20, 0.30, 0.40)
DIST_CASH_WEIGHTS = (0.05, 0.10, 0.15, 0.20)
CAP_THRESHOLDS = (0.20, 0.35, 0.50)
CHOP_THRESHOLDS = (0.35, 0.50, 0.65)
DIST_THRESHOLDS = (0.35, 0.50, 0.65)
CORE_BLOCK_THRESHOLDS = (0.45, 0.60, 0.75)


def _evaluate_from_curve(curve: pd.Series, latest_bar: pd.Timestamp) -> tuple[dict[str, dict[str, float]], dict[str, dict[str, float]], float, int, int]:
    window_results = {
        label: segment_metrics(curve, latest_bar - pd.Timedelta(days=days), latest_bar)
        for days, label in WINDOWS
    }
    year_results = {
        label: segment_metrics(curve, start, min(latest_bar, end_exclusive - pd.Timedelta(seconds=1)))
        for label, start, end_exclusive in YEAR_PERIODS
    }
    year_results["2026_YTD"] = segment_metrics(curve, YTD_START, latest_bar)
    score, negative_years, mdd_violations = score_candidate(
        window_results,
        {k: v for k, v in year_results.items() if k != "2026_YTD"},
    )
    return window_results, year_results, score, negative_years, mdd_violations


def _exact_static_variant(bundle, latest_bar: pd.Timestamp, filter_name: str) -> dict[str, object]:
    window_results: dict[str, dict[str, float]] = {}
    year_results: dict[str, dict[str, float]] = {}

    for days, label in WINDOWS:
        eval_start = latest_bar - pd.Timedelta(days=days)
        raw_start = eval_start - pd.Timedelta(days=90)
        sliced = slice_bundle(bundle, raw_start, latest_bar)
        cfg = build_strategy32_config(PROFILE_V7_DEFAULT, **STATIC_FILTERS[filter_name])
        backtester = Strategy32Backtester(cfg, sliced, trade_start=eval_start)
        backtester.engine_config.initial_capital = 1000.0
        curve = backtester.run().equity_curve.loc[lambda s: s.index >= eval_start]
        window_results[label] = segment_metrics(curve, eval_start, latest_bar)

    for label, start, end_exclusive in YEAR_PERIODS:
        eval_end = min(latest_bar, end_exclusive - pd.Timedelta(seconds=1))
        raw_start = start - pd.Timedelta(days=90)
        sliced = slice_bundle(bundle, raw_start, eval_end)
        cfg = build_strategy32_config(PROFILE_V7_DEFAULT, **STATIC_FILTERS[filter_name])
        backtester = Strategy32Backtester(cfg, sliced, trade_start=start)
        backtester.engine_config.initial_capital = 1000.0
        curve = backtester.run().equity_curve.loc[lambda s: s.index >= start]
        year_results[label] = segment_metrics(curve, start, eval_end)

    raw_start = YTD_START - pd.Timedelta(days=90)
    sliced = slice_bundle(bundle, raw_start, latest_bar)
    cfg = build_strategy32_config(PROFILE_V7_DEFAULT, **STATIC_FILTERS[filter_name])
    backtester = Strategy32Backtester(cfg, sliced, trade_start=YTD_START)
    backtester.engine_config.initial_capital = 1000.0
    curve = backtester.run().equity_curve.loc[lambda s: s.index >= YTD_START]
    year_results["2026_YTD"] = segment_metrics(curve, YTD_START, latest_bar)

    score, negative_years, mdd_violations = score_candidate(
        window_results,
        {k: v for k, v in year_results.items() if k != "2026_YTD"},
    )
    return {
        "name": filter_name,
        "windows": window_results,
        "years": year_results,
        "score": score,
        "negative_years": negative_years,
        "mdd_violations": mdd_violations,
        "validation": "exact_static_variant",
    }


def _core_exposure_summary(bundle, latest_bar: pd.Timestamp) -> dict[str, float]:
    eval_start = latest_bar - pd.Timedelta(days=1825)
    raw_start = eval_start - pd.Timedelta(days=90)
    sliced = slice_bundle(bundle, raw_start, latest_bar)
    cfg = build_strategy32_config(PROFILE_V7_DEFAULT, **STATIC_FILTERS[CORE_FILTER])
    backtester = Strategy32Backtester(cfg, sliced, trade_start=eval_start)
    backtester.engine_config.initial_capital = 1000.0
    result = backtester.run()
    exposure_frame = pd.DataFrame(result.metadata.get("exposure_rows", []))
    exposure_frame = exposure_frame.loc[exposure_frame["timestamp"] >= eval_start].copy()
    return {
        "avg_cash_pct": float(exposure_frame["cash_pct"].mean()),
        "median_cash_pct": float(exposure_frame["cash_pct"].median()),
        "cash_gt_50_pct": float((exposure_frame["cash_pct"] > 0.50).mean()),
        "cash_gt_80_pct": float((exposure_frame["cash_pct"] > 0.80).mean()),
        "avg_momentum_pct": float(exposure_frame["momentum_pct"].mean()),
        "avg_carry_pct": float(exposure_frame["carry_pct"].mean()),
    }


def _metric_line(metrics: dict[str, float], *, include_ann: bool) -> str:
    sharpe = metrics.get("sharpe")
    if include_ann:
        parts = [
            f"ret `{metrics['total_return'] * 100:.2f}%`",
            f"ann `{metrics['annualized_return'] * 100:.2f}%`",
        ]
    else:
        parts = [f"ret `{metrics['total_return'] * 100:.2f}%`"]
    if sharpe is not None:
        parts.append(f"sharpe `{sharpe:.2f}`")
    parts.append(f"mdd `{metrics['max_drawdown'] * 100:.2f}%`")
    return ", ".join(parts)


def main() -> None:
    bundle, latest_bar = load_component_bundle()
    eval_start = latest_bar - pd.Timedelta(days=1825)
    static_exact = STATIC_BASELINE
    exposure_summary = EXPOSURE_SUMMARY
    print("[stage] build 5y overlay components", flush=True)
    components = build_cash_overlay_period_components(
        bundle=bundle,
        eval_start=eval_start,
        eval_end=latest_bar,
        profile_name=PROFILE,
        core_filter=CORE_FILTER,
        cap_engine=CAP_ENGINE,
        chop_engine=CHOP_ENGINE,
        dist_engine=DIST_ENGINE,
    )
    print("[stage] begin approximate candidate search", flush=True)

    candidates = [
        CashOverlayCandidate(
            regime_profile=PROFILE,
            core_filter=CORE_FILTER,
            cap_engine=CAP_ENGINE,
            chop_engine=CHOP_ENGINE,
            dist_engine=DIST_ENGINE,
            cap_cash_weight=cap_cash_weight,
            chop_cash_weight=chop_cash_weight,
            dist_cash_weight=dist_cash_weight,
            cap_threshold=cap_threshold,
            chop_threshold=chop_threshold,
            dist_threshold=dist_threshold,
            core_block_threshold=core_block_threshold,
        )
        for (
            cap_cash_weight,
            chop_cash_weight,
            dist_cash_weight,
            cap_threshold,
            chop_threshold,
            dist_threshold,
            core_block_threshold,
        ) in itertools.product(
            CAP_CASH_WEIGHTS,
            CHOP_CASH_WEIGHTS,
            DIST_CASH_WEIGHTS,
            CAP_THRESHOLDS,
            CHOP_THRESHOLDS,
            DIST_THRESHOLDS,
            CORE_BLOCK_THRESHOLDS,
        )
    ]

    approx_rows: list[dict[str, object]] = []
    static_1y_ann = float(static_exact["windows"]["1y"]["annualized_return"])
    static_5y_ann = float(static_exact["windows"]["5y"]["annualized_return"])
    static_5y_mdd = float(static_exact["windows"]["5y"]["max_drawdown"])

    for idx, candidate in enumerate(candidates, start=1):
        curve, weights = compose_cash_overlay_curve(candidate=candidate, **components)
        window_results, year_results, score, negative_years, mdd_violations = _evaluate_from_curve(curve, latest_bar)
        beat_static_flags = {
            "1y_ann": float(window_results["1y"]["annualized_return"]) > static_1y_ann,
            "5y_ann": float(window_results["5y"]["annualized_return"]) > static_5y_ann,
            "5y_mdd": float(window_results["5y"]["max_drawdown"]) >= static_5y_mdd,
        }
        approx_rows.append(
            {
                "candidate": asdict(candidate),
                "name": candidate.name,
                "score": score,
                "negative_years": negative_years,
                "mdd_violations": mdd_violations,
                "windows": window_results,
                "years": year_results,
                "avg_weights": {
                    "cap": float(weights["cap_weight"].mean()),
                    "chop": float(weights["chop_weight"].mean()),
                    "dist": float(weights["dist_weight"].mean()),
                    "overlay_total": float(weights["overlay_total"].mean()),
                    "core_cash_pct": float(weights["core_cash_pct"].mean()),
                },
                "beat_static": beat_static_flags,
                "validation": "approx_full_curve_slice_cash_overlay",
            }
        )
        if idx % 500 == 0 or idx == len(candidates):
            print(
                f"[approx {idx:04d}/{len(candidates)}] "
                f"1y={window_results['1y']['total_return'] * 100:.2f}% "
                f"5y_ann={window_results['5y']['annualized_return'] * 100:.2f}%",
                flush=True,
            )

    approx_rows.sort(
        key=lambda row: (
            int(not row["beat_static"]["5y_ann"]),
            int(not row["beat_static"]["1y_ann"]),
            int(row["negative_years"]),
            int(row["mdd_violations"]),
            -float(row["score"]),
        )
    )

    exact_top: list[dict[str, object]] = []
    print("[stage] begin exact validation for top candidates", flush=True)
    for row in approx_rows[:5]:
        candidate = CashOverlayCandidate(**row["candidate"])
        print(f"[exact-start] {candidate.name}", flush=True)
        result = evaluate_cash_overlay_exact(bundle=bundle, latest_bar=latest_bar, candidate=candidate)
        exact_top.append(result)
        print(
            f"[exact] {candidate.name} 1y={result['windows']['1y']['total_return'] * 100:.2f}% "
            f"5y_ann={result['windows']['5y']['annualized_return'] * 100:.2f}% "
            f"neg={result['negative_years']} mdd_viol={result['mdd_violations']}",
            flush=True,
        )

    exact_top.sort(
        key=lambda row: (
            int(float(row["windows"]["5y"]["annualized_return"]) <= static_5y_ann),
            int(float(row["windows"]["1y"]["annualized_return"]) <= static_1y_ann),
            int(row["negative_years"]),
            int(row["mdd_violations"]),
            -float(row["score"]),
        )
    )
    best_exact = exact_top[0]

    soft_exact = json.loads(SOFT_JSON.read_text(encoding="utf-8")) if SOFT_JSON.exists() else None

    payload = {
        "analysis": "strategy32_cash_overlay_search",
        "latest_completed_bar": str(latest_bar),
        "candidate_count": len(candidates),
        "core_filter": CORE_FILTER,
        "engines": {
            "cap": CAP_ENGINE,
            "chop": CHOP_ENGINE,
            "dist": DIST_ENGINE,
        },
        "exposure_summary": exposure_summary,
        "static_exact": static_exact,
        "summary": approx_rows[:20],
        "exact_top": exact_top,
        "best_exact": best_exact,
        "best_soft_exact": soft_exact,
    }
    print("[stage] write outputs", flush=True)
    OUT_JSON.write_text(json.dumps(payload, indent=2), encoding="utf-8")

    lines = [
        "# Strategy32 Cash Overlay 탐색결과",
        "",
        "## 1. 목적",
        "",
        "정적 core를 줄이던 기존 soft-router를 버리고, `overheat_tolerant` core가 실제로 비워두는 현금 위에만 adverse 엔진을 얹는 cash-overlay 구조를 탐색한다.",
        "",
        "## 2. 왜 구조를 바꿨는가",
        "",
        f"- core `overheat_tolerant` 5y 평균 현금 비중: `{exposure_summary['avg_cash_pct'] * 100:.2f}%`",
        f"- core 중앙값 현금 비중: `{exposure_summary['median_cash_pct'] * 100:.2f}%`",
        f"- 현금 비중 `> 50%` 바 비율: `{exposure_summary['cash_gt_50_pct'] * 100:.2f}%`",
        f"- 현금 비중 `> 80%` 바 비율: `{exposure_summary['cash_gt_80_pct'] * 100:.2f}%`",
        "",
        "즉 기존 soft-router는 이미 대부분 현금인 core를 또 줄이고 있었다. overlay는 core를 대체하는 게 아니라, core가 실제로 안 쓰는 현금에만 들어가야 맞다.",
        "",
        "## 3. 탐색 범위",
        "",
        f"- profile: `{PROFILE}`",
        f"- core filter: `{CORE_FILTER}`",
        f"- cap engine: `{CAP_ENGINE}`",
        f"- chop engine: `{CHOP_ENGINE}`",
        f"- dist engine: `{DIST_ENGINE}`",
        f"- cap cash weights: `{CAP_CASH_WEIGHTS}`",
        f"- chop cash weights: `{CHOP_CASH_WEIGHTS}`",
        f"- dist cash weights: `{DIST_CASH_WEIGHTS}`",
        f"- cap thresholds: `{CAP_THRESHOLDS}`",
        f"- chop thresholds: `{CHOP_THRESHOLDS}`",
        f"- dist thresholds: `{DIST_THRESHOLDS}`",
        f"- core block thresholds: `{CORE_BLOCK_THRESHOLDS}`",
        f"- candidate count: `{len(candidates)}`",
        "",
        "## 4. 정적 core exact 기준선",
        "",
        f"- 1y: {_metric_line(static_exact['windows']['1y'], include_ann=False)}",
        f"- 2y: {_metric_line(static_exact['windows']['2y'], include_ann=True)}",
        f"- 3y: {_metric_line(static_exact['windows']['3y'], include_ann=True)}",
        f"- 4y: {_metric_line(static_exact['windows']['4y'], include_ann=True)}",
        f"- 5y: {_metric_line(static_exact['windows']['5y'], include_ann=True)}",
        f"- 2026 YTD: {_metric_line(static_exact['years']['2026_YTD'], include_ann=False)}",
        f"- 2025: {_metric_line(static_exact['years']['2025'], include_ann=False)}",
        f"- 2024: {_metric_line(static_exact['years']['2024'], include_ann=False)}",
        f"- 2023: {_metric_line(static_exact['years']['2023'], include_ann=False)}",
        f"- 2022: {_metric_line(static_exact['years']['2022'], include_ann=False)}",
        f"- 2021: {_metric_line(static_exact['years']['2021'], include_ann=False)}",
        "",
        "## 5. cash-overlay exact 상위 후보",
        "",
    ]

    for idx, row in enumerate(exact_top, start=1):
        candidate = row["candidate"]
        lines.extend(
            [
                f"### {idx}. {row['name']}",
                "",
                f"- weights: `cap {candidate['cap_cash_weight']:.2f}`, `chop {candidate['chop_cash_weight']:.2f}`, `dist {candidate['dist_cash_weight']:.2f}`",
                f"- thresholds: `cap {candidate['cap_threshold']:.2f}`, `chop {candidate['chop_threshold']:.2f}`, `dist {candidate['dist_threshold']:.2f}`, `block {candidate['core_block_threshold']:.2f}`",
                f"- 1y: {_metric_line(row['windows']['1y'], include_ann=False)}",
                f"- 2y: {_metric_line(row['windows']['2y'], include_ann=True)}",
                f"- 3y: {_metric_line(row['windows']['3y'], include_ann=True)}",
                f"- 4y: {_metric_line(row['windows']['4y'], include_ann=True)}",
                f"- 5y: {_metric_line(row['windows']['5y'], include_ann=True)}",
                f"- 2026 YTD: {_metric_line(row['years']['2026_YTD'], include_ann=False)}",
                f"- 2025: {_metric_line(row['years']['2025'], include_ann=False)}",
                f"- 2024: {_metric_line(row['years']['2024'], include_ann=False)}",
                f"- 2023: {_metric_line(row['years']['2023'], include_ann=False)}",
                f"- 2022: {_metric_line(row['years']['2022'], include_ann=False)}",
                f"- 2021: {_metric_line(row['years']['2021'], include_ann=False)}",
                f"- score `{row['score']:.3f}`, negative years `{row['negative_years']}`, mdd violations `{row['mdd_violations']}`",
                "",
            ]
        )

    lines.extend(
        [
            "## 6. 결론",
            "",
            (
                "cash-overlay가 정적 core보다 나아졌는지는 `best_exact`와 `static_exact` 비교로 판단한다. "
                "핵심 비교 포인트는 `1y`, `5y annualized`, `5y MDD`, 그리고 `2025/2024`의 음수 여부다."
            ),
            "",
            f"- best cash-overlay 1y: `{best_exact['windows']['1y']['total_return'] * 100:.2f}%` vs static `{static_exact['windows']['1y']['total_return'] * 100:.2f}%`",
            f"- best cash-overlay 5y ann: `{best_exact['windows']['5y']['annualized_return'] * 100:.2f}%` vs static `{static_exact['windows']['5y']['annualized_return'] * 100:.2f}%`",
            f"- best cash-overlay 5y MDD: `{best_exact['windows']['5y']['max_drawdown'] * 100:.2f}%` vs static `{static_exact['windows']['5y']['max_drawdown'] * 100:.2f}%`",
            "",
        ]
    )

    if soft_exact:
        lines.extend(
            [
                "## 7. 기존 replacement soft-router와 비교",
                "",
                f"- previous soft 1y: `{soft_exact['windows']['1y']['total_return'] * 100:.2f}%`",
                f"- previous soft 5y ann: `{soft_exact['windows']['5y']['annualized_return'] * 100:.2f}%`",
                f"- previous soft 5y MDD: `{soft_exact['windows']['5y']['max_drawdown'] * 100:.2f}%`",
                "",
            ]
        )

    OUT_MD.write_text("\n".join(lines), encoding="utf-8")
    print("[done] cash overlay search complete", flush=True)


if __name__ == "__main__":
    main()