import re
import requests
import nbformat


RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/"
# README locations we now support
_SECTIONS: dict[str, str] = {
    "intro":        "intro/README.md",
    "ch-states":    "ch-states/README.md",
    "ch-gates":     "ch-gates/README.md",
    "ch-algorithms":"ch-algorithms/README.md",
}

# ───────────────────────────────────────────────────────────────────
# internals
# ───────────────────────────────────────────────────────────────────
def _scrape_readme(rel_path: str) -> list[str]:
    """Return *.ipynb files mentioned in a README; empty list on failure."""
    try:
        md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text
        # markdown link target: (...filename.ipynb)
        return re.findall(r"\(([^)]+?\.ipynb)\)", md)
    except requests.RequestException:
        return []


def _discover_files() -> list[str]:
    """Aggregate notebooks from all configured READMEs (no fallback)."""
    files: list[str] = []
    for dir_key, readme in _SECTIONS.items():
        found = _scrape_readme(readme)
        # Prepend the directory path if the README gives bare filenames
        prefixed = [
            name if "/" in name else f"{dir_key}/{name}" for name in found
        ]
        files.extend(prefixed)
    return files


def _pretty(path: str) -> str:
    """'ch-states/bloch_sphere.ipynb' → 'Bloch Sphere'."""
    fname = path.rsplit("/", 1)[-1]
    return fname.replace("-", " ").replace(".ipynb", "").title()


# ───────────────────────────────────────────────────────────────────
# public tools
# ───────────────────────────────────────────────────────────────────
def get_theory_topics() -> dict[str, str]:
    """Return a mapping of friendly topic names to notebook file paths.
    
    Discovers available Jupyter notebooks from the Qiskit textbook across all
    four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping
    their respective README files.
    
    Returns:
        dict[str, str]: A dictionary mapping human-readable topic names to their
            corresponding notebook file paths. For example:
            {'What Is Quantum': 'intro/what-is-quantum.ipynb',
            'Bloch Sphere': 'ch-states/bloch_sphere.ipynb'}
            Returns an empty dictionary if network requests fail.
    
    Note:
        If network requests fail, returns an empty dictionary instead of
        falling back to hardcoded content.
    """
    try:
        discovered_files = _discover_files()
        if not discovered_files:
            return {}
        return {_pretty(p): p for p in discovered_files}
    except Exception:
        return {}


def get_theory(
    topic: str,
    markdown_only: bool = True,
    include_headers: bool = True,
) -> str:
    """Download and parse a Qiskit textbook notebook, returning its content as text.
    
    Accepts flexible topic identification: pretty names ("Teleportation"), 
    slugs ("teleportation"), or full paths ("intro/teleportation.ipynb").
    Downloads the notebook from GitHub and extracts its content.
    
    Args:
        topic (str): The quantum topic to fetch. Can be:
            - Pretty name: "Teleportation", "What Is Quantum"
            - Slug: "teleportation", "what-is-quantum"  
            - Full path: "intro/teleportation.ipynb"
        markdown_only (bool, optional): If True, include only markdown cells.
            If False, also include code cells wrapped in ```python blocks.
            Defaults to True.
        include_headers (bool, optional): If True, prepend an H1 header with
            the topic name for better readability. Defaults to True.
    
    Returns:
        str: The concatenated content of the notebook as formatted text,
            with cells separated by double newlines. Returns error messages
            if the topic is not found or if network requests fail.
    
    Example:
        >>> content = get_theory("teleportation")
        >>> print(content[:100])
        # Teleportation
        
        Quantum teleportation is a process by which quantum information...
    """
    topics = get_theory_topics()

    # Build lenient lookup table
    lookup: dict[str, str] = {}
    for nice, path in topics.items():
        slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb")
        lookup[nice.lower()] = path
        lookup[slug.lower()] = path
        lookup[path.lower()] = path

    key = topic.lower()
    if key not in lookup:
        if not topics:
            return "Unable to get theory - no topics available (network may be down)"
        available_topics = ', '.join(topics.keys())
        return f"Topic unknown: '{topic}'. Available topics: {available_topics}"

    path = lookup[key]
    
    try:
        raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text
        nb = nbformat.reads(raw_json, as_version=4)
    except Exception:
        return "Unable to get theory - failed to download or parse notebook content"

    chunks: list[str] = []
    if include_headers:
        chunks.append(f"# {_pretty(path)}\n")

    for cell in nb.cells:
        if cell.cell_type == "markdown":
            chunks.append(cell.source)
        elif cell.cell_type == "code" and not markdown_only:
            chunks.append(f"```python\n{cell.source}\n```")

    return "\n\n".join(chunks)