mcp-qiskit-learn / theory_tools.py
Ludek Matyska
feat: add json tools output
17aca7c
import re
import requests
import nbformat
import json
RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/"
# README locations we now support
_SECTIONS: dict[str, str] = {
"intro": "intro/README.md",
"ch-states": "ch-states/README.md",
"ch-gates": "ch-gates/README.md",
"ch-algorithms": "ch-algorithms/README.md",
}
# ───────────────────────────────────────────────────────────────────
# internals
# ───────────────────────────────────────────────────────────────────
def _scrape_readme(rel_path: str) -> list[str]:
"""Return *.ipynb files mentioned in a README; empty list on failure."""
try:
md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text
# markdown link target: (...filename.ipynb)
found = re.findall(r"\(([^)]+?\.ipynb)\)", md)
# Clean up relative path prefixes like "./"
return [name.lstrip("./") for name in found]
except requests.RequestException:
return []
def _discover_files() -> list[str]:
"""Aggregate notebooks from all configured READMEs (no fallback)."""
files: list[str] = []
for dir_key, readme in _SECTIONS.items():
found = _scrape_readme(readme)
# Prepend the directory path if the README gives bare filenames
prefixed = [name if "/" in name else f"{dir_key}/{name}" for name in found]
files.extend(prefixed)
return files
def _pretty(path: str) -> str:
"""'ch-states/bloch_sphere.ipynb' β†’ 'Bloch Sphere'."""
fname = path.rsplit("/", 1)[-1]
return fname.replace("-", " ").replace(".ipynb", "").title()
# ───────────────────────────────────────────────────────────────────
# public tools
# ───────────────────────────────────────────────────────────────────
def get_theory_topics() -> str:
"""Return a structured list of available quantum theory topics.
Discovers available Jupyter notebooks from the Qiskit textbook across all
four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping
their respective README files.
Returns:
str: JSON string containing a structured list of topics with title, slug, and path.
For example:
'{
"topics": [
{
"title": "What Is Quantum?",
"slug": "what-is-quantum",
"path": "intro/what-is-quantum.ipynb"
}
]
}'
Returns JSON with empty topics array if network requests fail.
Note:
If network requests fail, returns JSON with empty topics array instead of
falling back to hardcoded content.
"""
try:
discovered_files = _discover_files()
if not discovered_files:
return json.dumps({"topics": []})
topics = []
for path in discovered_files:
title = _pretty(path)
slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb")
topics.append({"title": title, "slug": slug, "path": path})
return json.dumps({"topics": topics}, indent=2)
except Exception:
return json.dumps({"topics": []})
def get_theory(
topic: str,
markdown_only: bool = True,
include_headers: bool = True,
) -> str:
"""Download and parse a Qiskit textbook notebook, returning its content as text.
Accepts flexible topic identification: pretty names ("Teleportation"),
slugs ("teleportation"), or full paths ("intro/teleportation.ipynb").
Downloads the notebook from GitHub and extracts its content.
Args:
topic (str): The quantum topic to fetch. Can be:
- Pretty name: "Teleportation", "What Is Quantum"
- Slug: "teleportation", "what-is-quantum"
- Full path: "intro/teleportation.ipynb"
markdown_only (bool, optional): If True, include only markdown cells.
If False, also include code cells wrapped in ```python blocks.
Defaults to True.
include_headers (bool, optional): If True, prepend an H1 header with
the topic name for better readability. Defaults to True.
Returns:
str: JSON string containing the topic name and notebook content.
For example:
'{
"topic": "Teleportation",
"content": "# Teleportation\\n\\nQuantum teleportation is a process..."
}'
Returns JSON with error message in content field if topic not found
or network requests fail.
Example:
>>> result = get_theory("teleportation")
>>> data = json.loads(result)
>>> print(data["topic"])
Teleportation
"""
topics_json = get_theory_topics()
topics_data = json.loads(topics_json)
topics = topics_data.get("topics", [])
# Build lenient lookup table
lookup: dict[str, str] = {}
for topic_info in topics:
title = topic_info["title"]
slug = topic_info["slug"]
path = topic_info["path"]
lookup[title.lower()] = path
lookup[slug.lower()] = path
lookup[path.lower()] = path
key = topic.lower()
if key not in lookup:
if not topics:
return json.dumps(
{
"topic": topic,
"content": "Unable to get theory - no topics available (network may be down)",
}
)
available_topics = ", ".join([t["title"] for t in topics])
return json.dumps(
{
"topic": topic,
"content": f"Topic unknown: '{topic}'. Available topics: {available_topics}",
}
)
path = lookup[key]
topic_title = _pretty(path)
try:
raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text
nb = nbformat.reads(raw_json, as_version=4)
except Exception:
return json.dumps(
{
"topic": topic_title,
"content": "Unable to get theory - failed to download or parse notebook content",
}
)
chunks: list[str] = []
if include_headers:
chunks.append(f"# {topic_title}\n")
for cell in nb.cells:
if cell.cell_type == "markdown":
chunks.append(cell.source)
elif cell.cell_type == "code" and not markdown_only:
chunks.append(f"```python\n{cell.source}\n```")
content = "\n\n".join(chunks)
return json.dumps({"topic": topic_title, "content": content}, indent=2)