Spaces:
Sleeping
Sleeping
File size: 7,035 Bytes
eba4c06 17aca7c eba4c06 044a142 17aca7c 044a142 17aca7c 044a142 eba4c06 044a142 7d9e03e eba4c06 044a142 eba4c06 044a142 17aca7c 044a142 eba4c06 044a142 eba4c06 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c 044a142 eba4c06 044a142 17aca7c 044a142 17aca7c eba4c06 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c 044a142 17aca7c eba4c06 17aca7c eba4c06 044a142 eba4c06 17aca7c 044a142 eba4c06 044a142 17aca7c eba4c06 044a142 17aca7c 044a142 17aca7c eba4c06 044a142 eba4c06 17aca7c eba4c06 044a142 eba4c06 044a142 eba4c06 17aca7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import re
import requests
import nbformat
import json
RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/"
# README locations we now support
_SECTIONS: dict[str, str] = {
"intro": "intro/README.md",
"ch-states": "ch-states/README.md",
"ch-gates": "ch-gates/README.md",
"ch-algorithms": "ch-algorithms/README.md",
}
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# internals
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _scrape_readme(rel_path: str) -> list[str]:
"""Return *.ipynb files mentioned in a README; empty list on failure."""
try:
md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text
# markdown link target: (...filename.ipynb)
found = re.findall(r"\(([^)]+?\.ipynb)\)", md)
# Clean up relative path prefixes like "./"
return [name.lstrip("./") for name in found]
except requests.RequestException:
return []
def _discover_files() -> list[str]:
"""Aggregate notebooks from all configured READMEs (no fallback)."""
files: list[str] = []
for dir_key, readme in _SECTIONS.items():
found = _scrape_readme(readme)
# Prepend the directory path if the README gives bare filenames
prefixed = [name if "/" in name else f"{dir_key}/{name}" for name in found]
files.extend(prefixed)
return files
def _pretty(path: str) -> str:
"""'ch-states/bloch_sphere.ipynb' β 'Bloch Sphere'."""
fname = path.rsplit("/", 1)[-1]
return fname.replace("-", " ").replace(".ipynb", "").title()
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# public tools
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_theory_topics() -> str:
"""Return a structured list of available quantum theory topics.
Discovers available Jupyter notebooks from the Qiskit textbook across all
four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping
their respective README files.
Returns:
str: JSON string containing a structured list of topics with title, slug, and path.
For example:
'{
"topics": [
{
"title": "What Is Quantum?",
"slug": "what-is-quantum",
"path": "intro/what-is-quantum.ipynb"
}
]
}'
Returns JSON with empty topics array if network requests fail.
Note:
If network requests fail, returns JSON with empty topics array instead of
falling back to hardcoded content.
"""
try:
discovered_files = _discover_files()
if not discovered_files:
return json.dumps({"topics": []})
topics = []
for path in discovered_files:
title = _pretty(path)
slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb")
topics.append({"title": title, "slug": slug, "path": path})
return json.dumps({"topics": topics}, indent=2)
except Exception:
return json.dumps({"topics": []})
def get_theory(
topic: str,
markdown_only: bool = True,
include_headers: bool = True,
) -> str:
"""Download and parse a Qiskit textbook notebook, returning its content as text.
Accepts flexible topic identification: pretty names ("Teleportation"),
slugs ("teleportation"), or full paths ("intro/teleportation.ipynb").
Downloads the notebook from GitHub and extracts its content.
Args:
topic (str): The quantum topic to fetch. Can be:
- Pretty name: "Teleportation", "What Is Quantum"
- Slug: "teleportation", "what-is-quantum"
- Full path: "intro/teleportation.ipynb"
markdown_only (bool, optional): If True, include only markdown cells.
If False, also include code cells wrapped in ```python blocks.
Defaults to True.
include_headers (bool, optional): If True, prepend an H1 header with
the topic name for better readability. Defaults to True.
Returns:
str: JSON string containing the topic name and notebook content.
For example:
'{
"topic": "Teleportation",
"content": "# Teleportation\\n\\nQuantum teleportation is a process..."
}'
Returns JSON with error message in content field if topic not found
or network requests fail.
Example:
>>> result = get_theory("teleportation")
>>> data = json.loads(result)
>>> print(data["topic"])
Teleportation
"""
topics_json = get_theory_topics()
topics_data = json.loads(topics_json)
topics = topics_data.get("topics", [])
# Build lenient lookup table
lookup: dict[str, str] = {}
for topic_info in topics:
title = topic_info["title"]
slug = topic_info["slug"]
path = topic_info["path"]
lookup[title.lower()] = path
lookup[slug.lower()] = path
lookup[path.lower()] = path
key = topic.lower()
if key not in lookup:
if not topics:
return json.dumps(
{
"topic": topic,
"content": "Unable to get theory - no topics available (network may be down)",
}
)
available_topics = ", ".join([t["title"] for t in topics])
return json.dumps(
{
"topic": topic,
"content": f"Topic unknown: '{topic}'. Available topics: {available_topics}",
}
)
path = lookup[key]
topic_title = _pretty(path)
try:
raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text
nb = nbformat.reads(raw_json, as_version=4)
except Exception:
return json.dumps(
{
"topic": topic_title,
"content": "Unable to get theory - failed to download or parse notebook content",
}
)
chunks: list[str] = []
if include_headers:
chunks.append(f"# {topic_title}\n")
for cell in nb.cells:
if cell.cell_type == "markdown":
chunks.append(cell.source)
elif cell.cell_type == "code" and not markdown_only:
chunks.append(f"```python\n{cell.source}\n```")
content = "\n\n".join(chunks)
return json.dumps({"topic": topic_title, "content": content}, indent=2)
|