ctm-dqn/scripts/plot_oldproj_edge_equivalen...

import argparse
import sys
import xml.etree.ElementTree as ET
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd


PROJECT_ROOT = Path(__file__).resolve().parent.parent
OLD_PROJ_OUTPUTS = PROJECT_ROOT / "old_proj" / "outputs"
DEFAULT_RESULTS_DIR = PROJECT_ROOT / "results" / "edge_flow_analysis"
DEFAULT_DATES = [
    "2023-09-29",
    "2023-09-30",
    "2023-10-01",
    "2023-10-02",
    "2023-10-03",
    "2023-10-04",
]
DEFAULT_COLORS = [
    "#1f77b4",
    "#ff7f0e",
    "#2ca02c",
    "#d62728",
    "#9467bd",
    "#8c564b",
]


def parse_args():
    parser = argparse.ArgumentParser(
        description="Plot equivalent veh/h for one edge across multiple old_proj flow days."
    )
    parser.add_argument(
        "--edge-id",
        default="G1523_AM7.1",
        help="SUMO edge id to analyze. Default: G1523_AM7.1",
    )
    parser.add_argument(
        "--dates",
        nargs="+",
        default=DEFAULT_DATES,
        help="Dates in YYYY-MM-DD format. Default: 2023-09-29 ... 2023-10-04",
    )
    parser.add_argument(
        "--old-proj-outputs",
        default=str(OLD_PROJ_OUTPUTS),
        help="Base directory of old_proj outputs. Default: old_proj/outputs",
    )
    parser.add_argument(
        "--results-dir",
        default=str(DEFAULT_RESULTS_DIR),
        help="Output directory for csv/png. Default: results/edge_flow_analysis",
    )
    parser.add_argument(
        "--continuous",
        action="store_true",
        help="Plot all selected dates as one continuous time series.",
    )
    return parser.parse_args()


def flow_output_dir(base_dir: Path, date_str: str) -> Path:
    return base_dir / f"flow_{date_str.replace('-', '')}" / "temp" / "trips.xml"


def extract_route_ids(trips_path: Path, edge_id: str) -> set[str]:
    root = ET.parse(trips_path).getroot()
    route_ids = set()
    for route_elem in root.findall("route"):
        edges = route_elem.attrib.get("edges", "").split()
        if edge_id in edges:
            route_id = route_elem.attrib.get("id")
            if route_id:
                route_ids.add(route_id)
    return route_ids


def extract_equivalent_veh_h(trips_path: Path, route_ids: set[str]) -> pd.DataFrame:
    root = ET.parse(trips_path).getroot()
    rows = []
    for flow_elem in root.findall("flow"):
        route_id = flow_elem.attrib.get("route")
        if route_id not in route_ids:
            continue
        begin = int(float(flow_elem.attrib["begin"]))
        number = int(float(flow_elem.attrib["number"]))
        rows.append({"begin": begin, "number": number})

    if not rows:
        return pd.DataFrame(columns=["clock_time", "equivalent_veh_h"])

    df = pd.DataFrame(rows)
    df = df.groupby("begin", as_index=False)["number"].sum().sort_values("begin")
    df["clock_time"] = pd.to_datetime(df["begin"], unit="s").dt.strftime("%H:%M")
    df["equivalent_veh_h"] = df["number"] * 12
    return df[["clock_time", "equivalent_veh_h"]]


def build_combined_df(edge_id: str, dates: list[str], outputs_dir: Path) -> tuple[pd.DataFrame, dict[str, set[str]]]:
    combined_df = None
    route_map: dict[str, set[str]] = {}

    for date_str in dates:
        trips_path = flow_output_dir(outputs_dir, date_str)
        if not trips_path.exists():
            raise FileNotFoundError(f"Missing trips file: {trips_path}")

        route_ids = extract_route_ids(trips_path, edge_id)
        if not route_ids:
            raise ValueError(f"No routes through edge {edge_id} in {trips_path}")

        date_df = extract_equivalent_veh_h(trips_path, route_ids)
        date_key = date_str.replace("-", "")
        date_df = date_df.rename(columns={"equivalent_veh_h": f"veh_h_{date_key}"})
        route_map[date_str] = route_ids

        if combined_df is None:
            combined_df = date_df
        else:
            combined_df = combined_df.merge(date_df, on="clock_time", how="outer")

    combined_df = combined_df.sort_values("clock_time").reset_index(drop=True)
    return combined_df, route_map


def plot_combined_df(edge_id: str, combined_df: pd.DataFrame, dates: list[str], output_png: Path):
    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(15, 6.5))

    for idx, date_str in enumerate(dates):
        date_key = date_str.replace("-", "")
        column = f"veh_h_{date_key}"
        label = pd.to_datetime(date_str).strftime("%m-%d")
        color = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
        ax.plot(combined_df["clock_time"], combined_df[column], label=label, color=color, linewidth=1.8)

    tick_step = max(1, len(combined_df) // 12)
    xticks = combined_df["clock_time"].iloc[::tick_step]
    ax.set_xticks(xticks)
    ax.tick_params(axis="x", rotation=45)
    ax.set_xlabel("Time of Day")
    ax.set_ylabel("Equivalent Flow (veh/h)")
    ax.set_title(f"{edge_id} Equivalent Flow Comparison")
    ax.legend(ncol=min(3, len(dates)), frameon=True)
    fig.tight_layout()
    fig.savefig(output_png, dpi=180, bbox_inches="tight")
    plt.close(fig)


def build_continuous_df(edge_id: str, dates: list[str], outputs_dir: Path) -> tuple[pd.DataFrame, dict[str, set[str]]]:
    frames = []
    route_map: dict[str, set[str]] = {}

    for date_str in dates:
        trips_path = flow_output_dir(outputs_dir, date_str)
        if not trips_path.exists():
            raise FileNotFoundError(f"Missing trips file: {trips_path}")

        route_ids = extract_route_ids(trips_path, edge_id)
        if not route_ids:
            raise ValueError(f"No routes through edge {edge_id} in {trips_path}")

        date_df = extract_equivalent_veh_h(trips_path, route_ids)
        date_df["date"] = date_str
        date_df["datetime"] = pd.to_datetime(date_df["date"] + " " + date_df["clock_time"])
        frames.append(date_df[["datetime", "equivalent_veh_h"]])
        route_map[date_str] = route_ids

    continuous_df = pd.concat(frames, ignore_index=True).sort_values("datetime").reset_index(drop=True)
    return continuous_df, route_map


def plot_continuous_df(edge_id: str, continuous_df: pd.DataFrame, output_png: Path):
    plt.style.use("seaborn-v0_8-whitegrid")
    fig, ax = plt.subplots(figsize=(16, 6.5))

    ax.plot(
        continuous_df["datetime"],
        continuous_df["equivalent_veh_h"],
        color="#1f77b4",
        linewidth=1.6,
    )

    tick_step = max(1, len(continuous_df) // 14)
    xticks = continuous_df["datetime"].iloc[::tick_step]
    ax.set_xticks(xticks)
    ax.set_xticklabels([dt.strftime("%m-%d %H:%M") for dt in xticks], rotation=45, ha="right")
    ax.set_xlabel("Time")
    ax.set_ylabel("Equivalent Flow (veh/h)")
    ax.set_title(f"{edge_id} Equivalent Flow Continuous Timeline")
    fig.tight_layout()
    fig.savefig(output_png, dpi=180, bbox_inches="tight")
    plt.close(fig)


def main():
    args = parse_args()
    outputs_dir = Path(args.old_proj_outputs)
    results_dir = Path(args.results_dir)
    results_dir.mkdir(parents=True, exist_ok=True)

    start_tag = min(args.dates)
    end_tag = max(args.dates)
    if args.continuous:
        continuous_df, route_map = build_continuous_df(args.edge_id, args.dates, outputs_dir)
        csv_path = results_dir / f"{args.edge_id}_equivalent_veh_h_continuous_{start_tag}_to_{end_tag}.csv"
        png_path = results_dir / f"{args.edge_id}_equivalent_veh_h_continuous_{start_tag}_to_{end_tag}.png"
        continuous_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
        plot_continuous_df(args.edge_id, continuous_df, png_path)
    else:
        combined_df, route_map = build_combined_df(args.edge_id, args.dates, outputs_dir)
        csv_path = results_dir / f"{args.edge_id}_equivalent_veh_h_{start_tag}_to_{end_tag}.csv"
        png_path = results_dir / f"{args.edge_id}_equivalent_veh_h_{start_tag}_to_{end_tag}.png"
        combined_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
        plot_combined_df(args.edge_id, combined_df, args.dates, png_path)

    print(f"Saved CSV: {csv_path}")
    print(f"Saved PNG: {png_path}")
    print("Routes used:")
    for date_str, route_ids in route_map.items():
        print(f"  {date_str}: {', '.join(sorted(route_ids))}")


if __name__ == "__main__":
    try:
        main()
    except Exception as exc:
        print(str(exc), file=sys.stderr)
        sys.exit(1)