#!/usr/bin/env python3
"""Minimal Google Data Portability probe for YouTube My Activity.

Purpose:
- Perform OAuth for dataportability.myactivity.youtube.
- Initiate a one-day myactivity.youtube archive job.
- Poll job state.
- Download completed archive files to local/server-side staging.

This script does not write to Obsidian.

Usage outline:
  python3 scripts/google_data_portability_probe.py auth --client-secret client_secret.json --token token.json
  python3 scripts/google_data_portability_probe.py initiate --token token.json --start 2026-05-15T00:00:00Z --end 2026-05-16T00:00:00Z --state state.json
  python3 scripts/google_data_portability_probe.py poll --token token.json --state state.json --download-dir downloads
"""

from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any

SCOPE = "https://www.googleapis.com/auth/dataportability.myactivity.youtube"
RESOURCE = "myactivity.youtube"
AUTH_URL = "https://accounts.google.com/o/oauth2/v2/auth"
TOKEN_URL = "https://oauth2.googleapis.com/token"
API_BASE = "https://dataportability.googleapis.com/v1"
DEFAULT_REDIRECT_URI = "http://localhost"


def load_json(path: Path) -> Any:
    return json.loads(path.read_text())


def save_json(path: Path, data: Any) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n")


def client_info(client_secret_path: Path) -> tuple[str, str]:
    data = load_json(client_secret_path)
    cfg = data.get("installed") or data.get("web") or data
    return cfg["client_id"], cfg["client_secret"]


def request_json(method: str, url: str, token: str | None = None, body: Any | None = None) -> Any:
    headers = {"Accept": "application/json"}
    data = None
    if token:
        headers["Authorization"] = f"Bearer {token}"
    if body is not None:
        headers["Content-Type"] = "application/json"
        data = json.dumps(body).encode()
    req = urllib.request.Request(url, data=data, headers=headers, method=method)
    try:
        with urllib.request.urlopen(req, timeout=120) as resp:
            raw = resp.read().decode("utf-8", "replace")
            return json.loads(raw) if raw.strip() else {}
    except urllib.error.HTTPError as exc:
        raw = exc.read().decode("utf-8", "replace")
        raise RuntimeError(f"HTTP {exc.code} {url}: {raw}") from exc


def refresh_if_needed(token_path: Path, client_secret_path: Path | None = None) -> dict[str, Any]:
    token = load_json(token_path)
    expires_at = token.get("expires_at", 0)
    if time.time() < expires_at - 120:
        return token
    refresh_token = token.get("refresh_token")
    if not refresh_token or not client_secret_path:
        return token
    client_id, client_secret = client_info(client_secret_path)
    body = urllib.parse.urlencode({
        "client_id": client_id,
        "client_secret": client_secret,
        "refresh_token": refresh_token,
        "grant_type": "refresh_token",
    }).encode()
    req = urllib.request.Request(TOKEN_URL, data=body, method="POST")
    req.add_header("Content-Type", "application/x-www-form-urlencoded")
    with urllib.request.urlopen(req, timeout=60) as resp:
        new = json.loads(resp.read().decode())
    token.update(new)
    if "expires_in" in new:
        token["expires_at"] = time.time() + int(new["expires_in"])
    save_json(token_path, token)
    return token


def auth_url(client_id: str, redirect_uri: str) -> str:
    params = {
        "client_id": client_id,
        "redirect_uri": redirect_uri,
        "response_type": "code",
        "scope": SCOPE,
        "access_type": "offline",
        "prompt": "consent",
        "include_granted_scopes": "false",
    }
    return AUTH_URL + "?" + urllib.parse.urlencode(params)


def exchange_code(client_id: str, client_secret: str, redirect_uri: str, code: str, token_path: Path) -> int:
    body = urllib.parse.urlencode({
        "code": code,
        "client_id": client_id,
        "client_secret": client_secret,
        "redirect_uri": redirect_uri,
        "grant_type": "authorization_code",
    }).encode()
    req = urllib.request.Request(TOKEN_URL, data=body, method="POST")
    req.add_header("Content-Type", "application/x-www-form-urlencoded")
    try:
        with urllib.request.urlopen(req, timeout=60) as resp:
            token = json.loads(resp.read().decode())
    except urllib.error.HTTPError as exc:
        raw = exc.read().decode("utf-8", "replace")
        print(f"Token exchange failed: HTTP {exc.code}: {raw}", file=sys.stderr)
        return 1
    if "expires_in" in token:
        token["expires_at"] = time.time() + int(token["expires_in"])
    token["scope_requested"] = SCOPE
    save_json(token_path, token)
    print(f"Saved token to {token_path}")
    return 0


def cmd_auth(args: argparse.Namespace) -> int:
    client_id, client_secret = client_info(Path(args.client_secret))
    redirect_uri = args.redirect_uri
    print("Open this URL in a browser and authorize access:\n")
    print(auth_url(client_id, redirect_uri))
    print("\nAfter Google redirects to localhost, copy the full final URL from the browser address bar.")
    print("If the page says it cannot connect, that is okay; the code is in the URL.")
    final = input("final redirected URL or code> ").strip()
    if final.startswith("http"):
        parsed = urllib.parse.urlparse(final)
        qs = urllib.parse.parse_qs(parsed.query)
        code = qs.get("code", [""])[0]
    else:
        code = final
    if not code:
        print("No authorization code found", file=sys.stderr)
        return 2
    return exchange_code(client_id, client_secret, redirect_uri, code, Path(args.token))


def cmd_auth_url(args: argparse.Namespace) -> int:
    client_id, _ = client_info(Path(args.client_secret))
    print(auth_url(client_id, args.redirect_uri))
    return 0


def cmd_auth_code(args: argparse.Namespace) -> int:
    client_id, client_secret = client_info(Path(args.client_secret))
    code = args.code
    if args.final_url:
        parsed = urllib.parse.urlparse(args.final_url)
        code = urllib.parse.parse_qs(parsed.query).get("code", [""])[0]
    if not code:
        print("Provide --code or --final-url", file=sys.stderr)
        return 2
    return exchange_code(client_id, client_secret, args.redirect_uri, code, Path(args.token))


def cmd_check_access(args: argparse.Namespace) -> int:
    token = refresh_if_needed(Path(args.token), Path(args.client_secret) if args.client_secret else None)
    data = request_json("POST", f"{API_BASE}/accessType:check", token["access_token"], {})
    print(json.dumps(data, indent=2, sort_keys=True))
    return 0


def cmd_initiate(args: argparse.Namespace) -> int:
    token = refresh_if_needed(Path(args.token), Path(args.client_secret) if args.client_secret else None)
    body = {"resources": [RESOURCE]}
    if args.start:
        body["startTime"] = args.start
    if args.end:
        body["endTime"] = args.end
    data = request_json("POST", f"{API_BASE}/portabilityArchive:initiate", token["access_token"], body)
    state = {
        "created_at": dt.datetime.now(dt.timezone.utc).isoformat(),
        "request": body,
        "initiate_response": data,
        "archive_job_id": data.get("archiveJobId"),
        "access_type": data.get("accessType"),
    }
    save_json(Path(args.state), state)
    print(json.dumps(state, indent=2, sort_keys=True))
    return 0


def cmd_poll(args: argparse.Namespace) -> int:
    token = refresh_if_needed(Path(args.token), Path(args.client_secret) if args.client_secret else None)
    state_path = Path(args.state)
    state = load_json(state_path)
    job_id = state.get("archive_job_id") or state.get("initiate_response", {}).get("archiveJobId")
    if not job_id:
        print("No archive job id in state file", file=sys.stderr)
        return 2
    name = f"archiveJobs/{job_id}/portabilityArchiveState"
    url = f"{API_BASE}/{urllib.parse.quote(name, safe='/')}"
    data = request_json("GET", url, token["access_token"])
    state["last_poll_at"] = dt.datetime.now(dt.timezone.utc).isoformat()
    state["last_poll_response"] = data
    save_json(state_path, state)
    print(json.dumps(data, indent=2, sort_keys=True))
    if data.get("state") == "COMPLETE" and args.download_dir:
        d = Path(args.download_dir)
        d.mkdir(parents=True, exist_ok=True)
        for idx, signed_url in enumerate(data.get("urls", []), start=1):
            out = d / f"archive-{job_id}-{idx}.zip"
            print(f"Downloading {signed_url[:80]}... -> {out}")
            with urllib.request.urlopen(signed_url, timeout=300) as resp:
                out.write_bytes(resp.read())
    return 0


def main() -> int:
    parser = argparse.ArgumentParser()
    sub = parser.add_subparsers(dest="cmd", required=True)

    p = sub.add_parser("auth")
    p.add_argument("--client-secret", required=True)
    p.add_argument("--token", required=True)
    p.add_argument("--redirect-uri", default=DEFAULT_REDIRECT_URI)
    p.set_defaults(func=cmd_auth)

    p = sub.add_parser("auth-url")
    p.add_argument("--client-secret", required=True)
    p.add_argument("--redirect-uri", default=DEFAULT_REDIRECT_URI)
    p.set_defaults(func=cmd_auth_url)

    p = sub.add_parser("auth-code")
    p.add_argument("--client-secret", required=True)
    p.add_argument("--token", required=True)
    p.add_argument("--redirect-uri", default=DEFAULT_REDIRECT_URI)
    p.add_argument("--code")
    p.add_argument("--final-url")
    p.set_defaults(func=cmd_auth_code)

    p = sub.add_parser("check-access")
    p.add_argument("--token", required=True)
    p.add_argument("--client-secret")
    p.set_defaults(func=cmd_check_access)

    p = sub.add_parser("initiate")
    p.add_argument("--token", required=True)
    p.add_argument("--client-secret")
    p.add_argument("--start")
    p.add_argument("--end")
    p.add_argument("--state", required=True)
    p.set_defaults(func=cmd_initiate)

    p = sub.add_parser("poll")
    p.add_argument("--token", required=True)
    p.add_argument("--client-secret")
    p.add_argument("--state", required=True)
    p.add_argument("--download-dir")
    p.set_defaults(func=cmd_poll)

    args = parser.parse_args()
    return args.func(args)


if __name__ == "__main__":
    raise SystemExit(main())
