#!/usr/bin/env python3
"""
Patch script to add fetch_and_save tool to Skill Seekers MCP.

This tool fetches web pages and saves directly to files WITHOUT passing content
through the Claude API, saving ~97% of tokens compared to WebFetch.

Usage:
    python add_fetch_and_save.py

After running:
    - Restart Claude Code to reload MCP servers
    - Use: mcp__skill-seeker__fetch_and_save(url="...", output="...")
"""

import re
import sys
from pathlib import Path

# Find skill_seekers installation
try:
    import skill_seekers
    SKILL_SEEKERS_PATH = Path(skill_seekers.__file__).parent
except ImportError:
    print("❌ skill-seekers not installed. Run: pip install skill-seekers")
    sys.exit(1)

print(f"📁 Found skill-seekers at: {SKILL_SEEKERS_PATH}")

# ============================================================================
# 1. Add fetch_and_save_tool to scraping_tools.py
# ============================================================================

FETCH_AND_SAVE_CODE = '''

async def fetch_and_save_tool(args: dict) -> list[TextContent]:
    """
    Fetch web pages and save directly to files without passing content through Claude API.

    This tool is optimized for token efficiency - it downloads content using Python's
    httpx library and saves directly to disk. Only metadata (status, bytes, path) is
    returned, NOT the actual content. This saves ~97% of tokens compared to WebFetch.

    Supports:
    - Single URL or batch of URLs
    - Automatic markdown extraction from HTML
    - Raw markdown file preservation
    - Configurable output paths

    Args:
        args: Dictionary containing:
            - url (str, optional): Single URL to fetch
            - urls (list, optional): List of {"url": str, "output": str} objects for batch
            - output (str, optional): Output file path (required if using single url)
            - extract_markdown (bool, optional): Extract markdown from HTML (default: True)
            - timeout (int, optional): Request timeout in seconds (default: 30)
            - rate_limit (float, optional): Delay between requests in seconds (default: 0.5)

    Returns:
        List[TextContent]: Summary of fetched files (status, bytes, errors) - NOT content

    Example:
        # Single file
        fetch_and_save(url="https://docs.example.com/guide.md", output="docs/guide.md")

        # Batch mode
        fetch_and_save(urls=[
            {"url": "https://docs.example.com/intro.md", "output": "docs/intro.md"},
            {"url": "https://docs.example.com/api.md", "output": "docs/api.md"}
        ])
    """
    import asyncio

    try:
        import httpx
        from bs4 import BeautifulSoup
    except ImportError as e:
        return [TextContent(type="text", text=f"❌ Missing dependency: {e}\\nInstall with: pip install httpx beautifulsoup4")]

    # Parse arguments
    single_url = args.get("url")
    urls_list = args.get("urls", [])
    single_output = args.get("output")
    extract_markdown = args.get("extract_markdown", True)
    timeout_val = args.get("timeout", 30)
    rate_limit = args.get("rate_limit", 0.5)

    # Build task list
    tasks = []
    if single_url and single_output:
        tasks.append({"url": single_url, "output": single_output})
    if urls_list:
        tasks.extend(urls_list)

    if not tasks:
        return [TextContent(type="text", text="❌ Error: Must provide 'url' + 'output' or 'urls' list")]

    # Results tracking
    results = {
        "success": [],
        "failed": [],
        "total_bytes": 0,
    }

    def extract_text_from_html(html_content: str) -> str:
        """Extract clean text content from HTML, preserving structure as markdown."""
        soup = BeautifulSoup(html_content, "html.parser")

        # Remove script and style elements
        for tag in soup(["script", "style", "nav", "footer", "header"]):
            tag.decompose()

        # Find main content area
        main = soup.select_one("main, article, [role='main'], .content, .markdown-body")
        if not main:
            main = soup.body or soup

        lines = []

        # Extract title
        title = soup.select_one("title")
        if title:
            lines.append(f"# {title.get_text().strip()}\\n")

        # Process content
        for elem in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6", "p", "pre", "code", "li", "blockquote"]):
            text = elem.get_text().strip()
            if not text:
                continue

            if elem.name.startswith("h"):
                level = int(elem.name[1])
                lines.append(f"\\n{'#' * level} {text}\\n")
            elif elem.name == "pre":
                code = elem.get_text()
                lang = ""
                code_elem = elem.find("code")
                if code_elem:
                    classes = code_elem.get("class", [])
                    for cls in classes:
                        if cls.startswith("language-"):
                            lang = cls.replace("language-", "")
                            break
                lines.append(f"\\n```{lang}\\n{code}\\n```\\n")
            elif elem.name == "li":
                lines.append(f"- {text}")
            elif elem.name == "blockquote":
                lines.append(f"> {text}")
            elif elem.name == "p":
                lines.append(f"\\n{text}\\n")

        return "\\n".join(lines)

    async def fetch_single(client, task: dict) -> dict:
        """Fetch a single URL and save to file."""
        url = task["url"]
        output_path = Path(task["output"])

        try:
            output_path.parent.mkdir(parents=True, exist_ok=True)
            response = await client.get(url, follow_redirects=True)
            response.raise_for_status()
            content = response.text

            if extract_markdown and not url.endswith(".md"):
                if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
                    content = extract_text_from_html(content)

            output_path.write_text(content, encoding="utf-8")

            return {
                "status": "ok",
                "url": url,
                "output": str(output_path),
                "bytes": len(content.encode("utf-8")),
            }

        except Exception as e:
            return {
                "status": "error",
                "url": url,
                "output": str(output_path),
                "error": str(e),
            }

    async def run_all():
        async with httpx.AsyncClient(
            timeout=timeout_val,
            headers={"User-Agent": "Mozilla/5.0 (Skill Seeker Documentation Fetcher)"},
            follow_redirects=True,
        ) as client:
            for i, task in enumerate(tasks):
                result = await fetch_single(client, task)

                if result["status"] == "ok":
                    results["success"].append(result)
                    results["total_bytes"] += result["bytes"]
                else:
                    results["failed"].append(result)

                if i < len(tasks) - 1 and rate_limit > 0:
                    await asyncio.sleep(rate_limit)

    await run_all()

    output_lines = [
        f"📥 Fetch and Save Complete",
        f"",
        f"✅ Success: {len(results['success'])} files",
        f"❌ Failed: {len(results['failed'])} files",
        f"📦 Total: {results['total_bytes']:,} bytes",
        f"",
    ]

    if results["success"]:
        output_lines.append("### Saved Files:")
        for r in results["success"][:20]:
            output_lines.append(f"  - {r['output']} ({r['bytes']:,} bytes)")
        if len(results["success"]) > 20:
            output_lines.append(f"  ... and {len(results['success']) - 20} more")

    if results["failed"]:
        output_lines.append("\\n### Failed:")
        for r in results["failed"][:10]:
            output_lines.append(f"  - {r['url']}: {r['error']}")
        if len(results["failed"]) > 10:
            output_lines.append(f"  ... and {len(results['failed']) - 10} more")

    return [TextContent(type="text", text="\\n".join(output_lines))]
'''

TOOL_REGISTRATION = '''

@safe_tool_decorator(
    description="Fetch web pages and save directly to files. Token-efficient: downloads via Python httpx, saves to disk, returns only metadata (NOT content). Saves ~97% tokens vs WebFetch. Supports batch mode and HTML-to-markdown conversion."
)
async def fetch_and_save(
    url: str | None = None,
    output: str | None = None,
    urls: list | None = None,
    extract_markdown: bool = True,
    timeout: int = 30,
    rate_limit: float = 0.5,
) -> str:
    """
    Fetch web pages and save directly to files without passing content through Claude API.

    Args:
        url: Single URL to fetch (use with 'output')
        output: Output file path for single URL mode
        urls: List of {"url": str, "output": str} dicts for batch mode
        extract_markdown: Extract markdown from HTML pages (default: true)
        timeout: Request timeout in seconds (default: 30)
        rate_limit: Delay between requests in seconds (default: 0.5)

    Returns:
        Summary with success/failure counts and file sizes - NOT content.
    """
    args = {
        "extract_markdown": extract_markdown,
        "timeout": timeout,
        "rate_limit": rate_limit,
    }
    if url:
        args["url"] = url
    if output:
        args["output"] = output
    if urls:
        args["urls"] = urls

    result = await fetch_and_save_impl(args)
    if isinstance(result, list) and result:
        return result[0].text if hasattr(result[0], "text") else str(result[0])
    return str(result)

'''


def patch_scraping_tools():
    """Add fetch_and_save_tool to scraping_tools.py"""
    file_path = SKILL_SEEKERS_PATH / "mcp" / "tools" / "scraping_tools.py"

    if not file_path.exists():
        print(f"❌ File not found: {file_path}")
        return False

    content = file_path.read_text(encoding="utf-8")

    # Check if already patched
    if "fetch_and_save_tool" in content:
        print("✓ scraping_tools.py already patched")
        return True

    # Add to end of file
    content = content.rstrip() + "\n" + FETCH_AND_SAVE_CODE
    file_path.write_text(content, encoding="utf-8")
    print("✅ Patched scraping_tools.py")
    return True


def patch_init():
    """Add export to __init__.py"""
    file_path = SKILL_SEEKERS_PATH / "mcp" / "tools" / "__init__.py"

    if not file_path.exists():
        print(f"❌ File not found: {file_path}")
        return False

    content = file_path.read_text(encoding="utf-8")

    if "fetch_and_save_impl" in content:
        print("✓ __init__.py already patched")
        return True

    # Add import
    import_line = "from .scraping_tools import (\n    fetch_and_save_tool as fetch_and_save_impl,\n)"
    content = content.replace(
        "from .scraping_tools import (\n    build_how_to_guides_tool as build_how_to_guides_impl,\n)",
        "from .scraping_tools import (\n    build_how_to_guides_tool as build_how_to_guides_impl,\n)\n" + import_line
    )

    # Add to __all__
    content = content.replace(
        '"extract_config_patterns_impl",',
        '"extract_config_patterns_impl",\n    "fetch_and_save_impl",'
    )

    file_path.write_text(content, encoding="utf-8")
    print("✅ Patched __init__.py")
    return True


def patch_server():
    """Add tool registration to server_fastmcp.py"""
    file_path = SKILL_SEEKERS_PATH / "mcp" / "server_fastmcp.py"

    if not file_path.exists():
        print(f"❌ File not found: {file_path}")
        return False

    content = file_path.read_text(encoding="utf-8")

    if "fetch_and_save_impl" in content:
        print("✓ server_fastmcp.py already patched")
        return True

    # Add import
    content = content.replace(
        "extract_test_examples_impl,",
        "extract_test_examples_impl,\n        fetch_and_save_impl,"
    )

    # Add tool registration before PACKAGING TOOLS
    content = content.replace(
        "# ============================================================================\n# PACKAGING TOOLS",
        TOOL_REGISTRATION + "\n# ============================================================================\n# PACKAGING TOOLS"
    )

    file_path.write_text(content, encoding="utf-8")
    print("✅ Patched server_fastmcp.py")
    return True


def main():
    print("=" * 60)
    print("🔧 Adding fetch_and_save tool to Skill Seekers MCP")
    print("=" * 60)
    print()

    success = True
    success = patch_scraping_tools() and success
    success = patch_init() and success
    success = patch_server() and success

    print()
    if success:
        print("=" * 60)
        print("✅ Patch complete!")
        print()
        print("Next steps:")
        print("  1. Restart Claude Code to reload MCP servers")
        print("  2. Use the tool:")
        print()
        print("     mcp__skill-seeker__fetch_and_save(")
        print('         url="https://example.com/doc.md",')
        print('         output="local/path/doc.md"')
        print("     )")
        print()
        print("  Or batch mode:")
        print()
        print("     mcp__skill-seeker__fetch_and_save(urls=[")
        print('         {"url": "...", "output": "..."},')
        print('         {"url": "...", "output": "..."}')
        print("     ])")
        print("=" * 60)
    else:
        print("❌ Some patches failed. Check errors above.")
        sys.exit(1)


if __name__ == "__main__":
    main()