logic855
/
claude-mods
zrcadlo https://github.com/0xDarkMatter/claude-mods.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
							#!/usr/bin/env python3
# Rank Playwright tests by flakiness from a JSON report so the agent triages, not eyeballs.
#
# Parses a Playwright JSON report (`--reporter=json`) and surfaces the tests
# worth a human's attention: flaky tests (passed only on retry) first, then
# hard "unexpected" failures. Flaky tests are ranked by retry count desc, then
# total duration desc, because the most-retried, slowest test is the worst
# offender in your queue.
#
# Usage:   triage-flakes.py [OPTIONS] [REPORT]
# Input:   REPORT = path to a Playwright JSON report (positional, default ./results.json)
# Output:  stdout = ranked findings (TSV, or JSON envelope with --json)
# Stderr:  headers, summary, progress, errors
# Exit:    0 parsed fine, no flaky/unexpected tests (clean suite)
#          2 usage, 3 file not found, 4 malformed/not a Playwright report,
#          10 DOMAIN SIGNAL: flaky/unexpected tests present (the thing being triaged)
#
# Examples:
#   npx playwright test --reporter=json > results.json
#   triage-flakes.py results.json
#   triage-flakes.py --outcome all -n 50 results.json
#   triage-flakes.py --json results.json | jq '.data[] | select(.outcome=="flaky")'

import argparse
import json
import os
import sys
from pathlib import Path

# Windows consoles default to cp1252; force UTF-8 so glyphs in framing don't raise
# UnicodeEncodeError (the repo's standard fix).
for _stream in (sys.stdout, sys.stderr):
    try:
        _stream.reconfigure(encoding="utf-8")  # type: ignore[attr-defined]
    except (AttributeError, ValueError):
        pass


class Term:
    """Tiny ANSI helper mirroring skills/_lib/term.sh (bash-only; per
    TERMINAL-DESIGN.md §9 the Python port is inline). Honors FORCE_COLOR /
    NO_COLOR / TERM_ASCII; ASCII glyph fallback on TERM_ASCII or a non-UTF stream."""

    _C = {"green": "\033[32m", "yellow": "\033[33m", "orange": "\033[38;5;208m",
          "red": "\033[31m", "cyan": "\033[36m", "dim": "\033[2m", "off": "\033[0m"}
    _GLYPH = {"ok": "✓", "bad": "✗", "warn": "▲", "skip": "—", "na": "—", "unknown": "?"}
    _ASCII = {"ok": "+", "bad": "x", "warn": "!", "skip": "-", "na": "-", "unknown": "?"}
    _MARK_COLOR = {"ok": "green", "bad": "red", "warn": "orange", "skip": "dim",
                   "na": "dim", "unknown": "yellow"}

    def __init__(self, stream=sys.stderr):
        enc = (getattr(stream, "encoding", "") or "").lower()
        self.ascii = (os.environ.get("TERM_ASCII") == "1"
                      or os.environ.get("FLEET_ASCII") == "1" or "utf" not in enc)
        if os.environ.get("FORCE_COLOR"):
            self.color = True
        elif (os.environ.get("NO_COLOR") is not None or os.environ.get("TERM") == "dumb"
              or not getattr(stream, "isatty", lambda: False)()):
            self.color = False
        else:
            self.color = True

    def c(self, name, text):
        return f"{self._C.get(name, '')}{text}{self._C['off']}" if self.color else text

    def mark(self, state):
        return self.c(self._MARK_COLOR.get(state, ""),
                      (self._ASCII if self.ascii else self._GLYPH).get(state, "."))

    def hdr(self, text):
        return self.c("cyan", f"=== {text} ===")


TERM = Term(sys.stderr)

SCHEMA = "claude-mods.playwright-ops.flake-triage/v1"

EXIT_OK = 0
EXIT_USAGE = 2
EXIT_NOT_FOUND = 3
EXIT_VALIDATION = 4
EXIT_FINDINGS = 10

# Rank order for outcomes: flaky always sorts before unexpected.
OUTCOME_RANK = {"flaky": 0, "unexpected": 1}


def err(msg):
    print(msg, file=sys.stderr)


def walk_suites(suites, finds, file_hint=""):
    """Recursively descend the suites tree collecting spec/test results."""
    for suite in suites or []:
        # A suite's file is on the suite node; specs inherit it.
        sfile = suite.get("file") or file_hint
        for spec in suite.get("specs", []) or []:
            collect_spec(spec, finds, sfile)
        walk_suites(suite.get("suites"), finds, sfile)


def collect_spec(spec, finds, sfile):
    title = spec.get("title", "<untitled>")
    sline = spec.get("line", 0)
    sfile = spec.get("file") or sfile
    for test in spec.get("tests", []) or []:
        outcome = test.get("status") or test.get("outcome") or "unknown"
        results = test.get("results", []) or []
        # status sequence ordered by retry index; duration summed across attempts
        ordered = sorted(results, key=lambda r: r.get("retry", 0))
        statuses = [r.get("status", "unknown") for r in ordered]
        duration = sum(int(r.get("duration", 0) or 0) for r in ordered)
        retries = max((r.get("retry", 0) for r in ordered), default=0)
        location = f"{sfile}:{sline}" if sfile else f"?:{sline}"
        finds.append(
            {
                "title": title,
                "location": location,
                "outcome": outcome,
                "retries": retries,
                "statuses": statuses,
                "durationMs": duration,
            }
        )


def load_report(path):
    """Return parsed Playwright report dict, or raise ValueError if not one."""
    try:
        raw = path.read_text(encoding="utf-8")
    except OSError as e:
        raise FileNotFoundError(str(e))
    try:
        data = json.loads(raw)
    except json.JSONDecodeError as e:
        raise ValueError(f"not valid JSON: {e}")
    if not isinstance(data, dict) or "suites" not in data:
        raise ValueError("missing top-level 'suites' key - not a Playwright JSON report")
    if not isinstance(data["suites"], list):
        raise ValueError("'suites' is not a list — not a Playwright JSON report")
    return data


def main(argv=None):
    p = argparse.ArgumentParser(
        prog="triage-flakes.py",
        description="Rank Playwright tests by flakiness from a JSON report.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "EXAMPLES:\n"
            "  npx playwright test --reporter=json > results.json\n"
            "  triage-flakes.py results.json\n"
            "  triage-flakes.py --outcome all -n 50 results.json\n"
            "  triage-flakes.py --json results.json | jq '.data[] | select(.outcome==\"flaky\")'\n"
            "\n"
            "EXIT CODES:\n"
            "  0  parsed fine, no flaky/unexpected tests (clean suite)\n"
            "  2  usage   3  file not found   4  malformed report\n"
            "  10 flaky/unexpected tests present (the triage signal)\n"
        ),
    )
    p.add_argument(
        "report",
        nargs="?",
        default="results.json",
        help="path to Playwright JSON report (default: ./results.json)",
    )
    p.add_argument("--json", action="store_true", help="emit a JSON envelope instead of TSV")
    p.add_argument("-q", "--quiet", action="store_true",
                   help="suppress the stderr summary header (errors still print)")
    p.add_argument(
        "-n",
        "--limit",
        type=int,
        default=20,
        metavar="N",
        help="cap rows printed (default 20)",
    )
    p.add_argument(
        "--outcome",
        default="flaky,unexpected",
        help="which outcomes to include: flaky | unexpected | all (default flaky,unexpected)",
    )
    args = p.parse_args(argv)

    if args.limit < 0:
        err("ERROR: --limit must be >= 0")
        return EXIT_USAGE

    sel = args.outcome.strip().lower()
    if sel == "all":
        wanted = None  # all outcomes
    else:
        wanted = {x.strip() for x in sel.split(",") if x.strip()}
        unknown = wanted - {"flaky", "unexpected", "expected", "skipped"}
        if unknown:
            err(f"ERROR: unknown outcome(s): {', '.join(sorted(unknown))} (use flaky|unexpected|all)")
            return EXIT_USAGE

    path = Path(args.report).resolve()
    if not path.exists():
        err(f"ERROR: report not found: {path}")
        if args.json:
            print(json.dumps({"error": {"code": "NOT_FOUND", "message": f"report not found: {path}"}}))
        return EXIT_NOT_FOUND
    if not path.is_file():
        err(f"ERROR: not a file: {path}")
        return EXIT_NOT_FOUND

    try:
        data = load_report(path)
    except FileNotFoundError as e:
        err(f"ERROR: cannot read report: {e}")
        return EXIT_NOT_FOUND
    except ValueError as e:
        err(f"ERROR: malformed report: {e}")
        if args.json:
            print(json.dumps({"error": {"code": "VALIDATION", "message": str(e)}}))
        return EXIT_VALIDATION

    finds = []
    walk_suites(data.get("suites"), finds)

    # The domain signal is computed over ALL findings, regardless of the display
    # filter — a clean suite means zero flaky AND zero unexpected, full stop.
    signal_present = any(f["outcome"] in ("flaky", "unexpected") for f in finds)

    if wanted is None:
        shown = list(finds)
    else:
        shown = [f for f in finds if f["outcome"] in wanted]

    # Rank: flaky before unexpected (OUTCOME_RANK), then retries desc, duration desc.
    shown.sort(
        key=lambda f: (
            OUTCOME_RANK.get(f["outcome"], 99),
            -f["retries"],
            -f["durationMs"],
        )
    )

    capped = shown[: args.limit] if args.limit else shown

    total = len(finds)
    flaky_n = sum(1 for f in finds if f["outcome"] == "flaky")
    unexp_n = sum(1 for f in finds if f["outcome"] == "unexpected")
    if not args.quiet:
        err(TERM.hdr(f"Flake triage: {path.name}"))
        flaky_txt = TERM.c("orange", f"{flaky_n} flaky") if flaky_n else "0 flaky"
        unexp_txt = TERM.c("red", f"{unexp_n} unexpected") if unexp_n else "0 unexpected"
        err(f"  {total} tests | {flaky_txt} | {unexp_txt} | showing {len(capped)} of {len(shown)}")

    if args.json:
        envelope = {
            "data": capped,
            "meta": {
                "count": len(capped),
                "total_matched": len(shown),
                "flaky": flaky_n,
                "unexpected": unexp_n,
                "schema": SCHEMA,
            },
        }
        print(json.dumps(envelope, indent=2))
    else:
        print("outcome\tretries\tstatuses\tduration_ms\tlocation\ttitle")
        for f in capped:
            print(
                f"{f['outcome']}\t{f['retries']}\t{'->'.join(f['statuses'])}\t"
                f"{f['durationMs']}\t{f['location']}\t{f['title']}"
            )

    return EXIT_FINDINGS if signal_present else EXIT_OK


if __name__ == "__main__":
    sys.exit(main())