import re import requests import nbformat RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/" # README locations we now support _SECTIONS: dict[str, str] = { "intro": "intro/README.md", "ch-states": "ch-states/README.md", "ch-gates": "ch-gates/README.md", "ch-algorithms":"ch-algorithms/README.md", } # ─────────────────────────────────────────────────────────────────── # internals # ─────────────────────────────────────────────────────────────────── def _scrape_readme(rel_path: str) -> list[str]: """Return *.ipynb files mentioned in a README; empty list on failure.""" try: md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text # markdown link target: (...filename.ipynb) return re.findall(r"\(([^)]+?\.ipynb)\)", md) except requests.RequestException: return [] def _discover_files() -> list[str]: """Aggregate notebooks from all configured READMEs (no fallback).""" files: list[str] = [] for dir_key, readme in _SECTIONS.items(): found = _scrape_readme(readme) # Prepend the directory path if the README gives bare filenames prefixed = [ name if "/" in name else f"{dir_key}/{name}" for name in found ] files.extend(prefixed) return files def _pretty(path: str) -> str: """'ch-states/bloch_sphere.ipynb' → 'Bloch Sphere'.""" fname = path.rsplit("/", 1)[-1] return fname.replace("-", " ").replace(".ipynb", "").title() # ─────────────────────────────────────────────────────────────────── # public tools # ─────────────────────────────────────────────────────────────────── def get_theory_topics() -> dict[str, str]: """Return a mapping of friendly topic names to notebook file paths. Discovers available Jupyter notebooks from the Qiskit textbook across all four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping their respective README files. Returns: dict[str, str]: A dictionary mapping human-readable topic names to their corresponding notebook file paths. For example: {'What Is Quantum': 'intro/what-is-quantum.ipynb', 'Bloch Sphere': 'ch-states/bloch_sphere.ipynb'} Returns an empty dictionary if network requests fail. Note: If network requests fail, returns an empty dictionary instead of falling back to hardcoded content. """ try: discovered_files = _discover_files() if not discovered_files: return {} return {_pretty(p): p for p in discovered_files} except Exception: return {} def get_theory( topic: str, markdown_only: bool = True, include_headers: bool = True, ) -> str: """Download and parse a Qiskit textbook notebook, returning its content as text. Accepts flexible topic identification: pretty names ("Teleportation"), slugs ("teleportation"), or full paths ("intro/teleportation.ipynb"). Downloads the notebook from GitHub and extracts its content. Args: topic (str): The quantum topic to fetch. Can be: - Pretty name: "Teleportation", "What Is Quantum" - Slug: "teleportation", "what-is-quantum" - Full path: "intro/teleportation.ipynb" markdown_only (bool, optional): If True, include only markdown cells. If False, also include code cells wrapped in ```python blocks. Defaults to True. include_headers (bool, optional): If True, prepend an H1 header with the topic name for better readability. Defaults to True. Returns: str: The concatenated content of the notebook as formatted text, with cells separated by double newlines. Returns error messages if the topic is not found or if network requests fail. Example: >>> content = get_theory("teleportation") >>> print(content[:100]) # Teleportation Quantum teleportation is a process by which quantum information... """ topics = get_theory_topics() # Build lenient lookup table lookup: dict[str, str] = {} for nice, path in topics.items(): slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb") lookup[nice.lower()] = path lookup[slug.lower()] = path lookup[path.lower()] = path key = topic.lower() if key not in lookup: if not topics: return "Unable to get theory - no topics available (network may be down)" available_topics = ', '.join(topics.keys()) return f"Topic unknown: '{topic}'. Available topics: {available_topics}" path = lookup[key] try: raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text nb = nbformat.reads(raw_json, as_version=4) except Exception: return "Unable to get theory - failed to download or parse notebook content" chunks: list[str] = [] if include_headers: chunks.append(f"# {_pretty(path)}\n") for cell in nb.cells: if cell.cell_type == "markdown": chunks.append(cell.source) elif cell.cell_type == "code" and not markdown_only: chunks.append(f"```python\n{cell.source}\n```") return "\n\n".join(chunks)