Spaces:

Makkoen
/

mcp-qiskit-learn

Sleeping

mcp-qiskit-learn / theory_tools.py

Ludek Matyska

feat: add json tools output

17aca7c 3 months ago

7.04 kB

	import re
	import requests
	import nbformat
	import json


	RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/"
	# README locations we now support
	_SECTIONS: dict[str, str] = {
	"intro": "intro/README.md",
	"ch-states": "ch-states/README.md",
	"ch-gates": "ch-gates/README.md",
	"ch-algorithms": "ch-algorithms/README.md",
	}


	# ───────────────────────────────────────────────────────────────────
	# internals
	# ───────────────────────────────────────────────────────────────────
	def _scrape_readme(rel_path: str) -> list[str]:
	"""Return *.ipynb files mentioned in a README; empty list on failure."""
	try:
	md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text
	# markdown link target: (...filename.ipynb)
	found = re.findall(r"\(([^)]+?\.ipynb)\)", md)
	# Clean up relative path prefixes like "./"
	return [name.lstrip("./") for name in found]
	except requests.RequestException:
	return []


	def _discover_files() -> list[str]:
	"""Aggregate notebooks from all configured READMEs (no fallback)."""
	files: list[str] = []
	for dir_key, readme in _SECTIONS.items():
	found = _scrape_readme(readme)
	# Prepend the directory path if the README gives bare filenames
	prefixed = [name if "/" in name else f"{dir_key}/{name}" for name in found]
	files.extend(prefixed)
	return files


	def _pretty(path: str) -> str:
	"""'ch-states/bloch_sphere.ipynb' → 'Bloch Sphere'."""
	fname = path.rsplit("/", 1)[-1]
	return fname.replace("-", " ").replace(".ipynb", "").title()


	# ───────────────────────────────────────────────────────────────────
	# public tools
	# ───────────────────────────────────────────────────────────────────
	def get_theory_topics() -> str:
	"""Return a structured list of available quantum theory topics.

	Discovers available Jupyter notebooks from the Qiskit textbook across all
	four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping
	their respective README files.

	Returns:
	str: JSON string containing a structured list of topics with title, slug, and path.
	For example:
	'{
	"topics": [
	{
	"title": "What Is Quantum?",
	"slug": "what-is-quantum",
	"path": "intro/what-is-quantum.ipynb"
	}
	]
	}'
	Returns JSON with empty topics array if network requests fail.

	Note:
	If network requests fail, returns JSON with empty topics array instead of
	falling back to hardcoded content.
	"""
	try:
	discovered_files = _discover_files()
	if not discovered_files:
	return json.dumps({"topics": []})

	topics = []
	for path in discovered_files:
	title = _pretty(path)
	slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb")
	topics.append({"title": title, "slug": slug, "path": path})

	return json.dumps({"topics": topics}, indent=2)
	except Exception:
	return json.dumps({"topics": []})


	def get_theory(
	topic: str,
	markdown_only: bool = True,
	include_headers: bool = True,
	) -> str:
	"""Download and parse a Qiskit textbook notebook, returning its content as text.

	Accepts flexible topic identification: pretty names ("Teleportation"),
	slugs ("teleportation"), or full paths ("intro/teleportation.ipynb").
	Downloads the notebook from GitHub and extracts its content.

	Args:
	topic (str): The quantum topic to fetch. Can be:
	- Pretty name: "Teleportation", "What Is Quantum"
	- Slug: "teleportation", "what-is-quantum"
	- Full path: "intro/teleportation.ipynb"
	markdown_only (bool, optional): If True, include only markdown cells.
	If False, also include code cells wrapped in ```python blocks.
	Defaults to True.
	include_headers (bool, optional): If True, prepend an H1 header with
	the topic name for better readability. Defaults to True.

	Returns:
	str: JSON string containing the topic name and notebook content.
	For example:
	'{
	"topic": "Teleportation",
	"content": "# Teleportation\\n\\nQuantum teleportation is a process..."
	}'
	Returns JSON with error message in content field if topic not found
	or network requests fail.

	Example:
	>>> result = get_theory("teleportation")
	>>> data = json.loads(result)
	>>> print(data["topic"])
	Teleportation
	"""
	topics_json = get_theory_topics()
	topics_data = json.loads(topics_json)
	topics = topics_data.get("topics", [])

	# Build lenient lookup table
	lookup: dict[str, str] = {}
	for topic_info in topics:
	title = topic_info["title"]
	slug = topic_info["slug"]
	path = topic_info["path"]

	lookup[title.lower()] = path
	lookup[slug.lower()] = path
	lookup[path.lower()] = path

	key = topic.lower()
	if key not in lookup:
	if not topics:
	return json.dumps(
	{
	"topic": topic,
	"content": "Unable to get theory - no topics available (network may be down)",
	}
	)
	available_topics = ", ".join([t["title"] for t in topics])
	return json.dumps(
	{
	"topic": topic,
	"content": f"Topic unknown: '{topic}'. Available topics: {available_topics}",
	}
	)

	path = lookup[key]
	topic_title = _pretty(path)

	try:
	raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text
	nb = nbformat.reads(raw_json, as_version=4)
	except Exception:
	return json.dumps(
	{
	"topic": topic_title,
	"content": "Unable to get theory - failed to download or parse notebook content",
	}
	)

	chunks: list[str] = []
	if include_headers:
	chunks.append(f"# {topic_title}\n")

	for cell in nb.cells:
	if cell.cell_type == "markdown":
	chunks.append(cell.source)
	elif cell.cell_type == "code" and not markdown_only:
	chunks.append(f"```python\n{cell.source}\n```")

	content = "\n\n".join(chunks)
	return json.dumps({"topic": topic_title, "content": content}, indent=2)