Final_Assignment / tools /test_wikipedia.py
tdziwok's picture
first commit
31af2b2
import pytest
import wikipedia
def test_get_wikipedia_table_of_contents_happy():
# use the function defined earlier
url = "https://en.wikipedia.org/wiki/Mercedes_Sosa"
toc = wikipedia.get_wikipedia_article_sections(url)
expect = """Sections within article:
- Life
- Awards
- Death
- Tributes
- Discography
- Studio_albums
- EPs
- Live_albums
- Compilation_albums
- Filmography
- Further_reading
- References
- External_links"""
assert toc == expect
def test_get_wikipedia_table_of_contents_404():
# Test with an invalid url
bad_url = "https://en.wikipedia.org/wiki/ThisArticleDoesNotExist123456"
toc_bad = wikipedia.get_wikipedia_article_sections(bad_url)
assert "could not extract table of contents" in toc_bad or "error" in toc_bad.lower()
def test_get_wikipedia_table_of_contents_not_wikipedia():
# Test with a non-Wikipedia url
not_wiki = "https://www.google.com"
toc_not_wiki = wikipedia.get_wikipedia_article_sections(not_wiki)
assert "the provided url does not appear to be a valid wikipedia page." in toc_not_wiki.lower()
def test_extract_wikipedia_section_1():
section = wikipedia.extract_wikipedia_section("https://en.wikipedia.org/wiki/Mercedes_Sosa", "Discography")
assert "Sosa in 2005, with Argentina's then-First Lady (later president from 2007 to 2015), Cristina Fernández de Kirchner" in section
assert "Con Sabor A Mercedes Sosa * Label: Philips " in section
assert "Niño De Mañana * Label: Philips" in section
assert section.endswith("| 2013 | Siempre En Ti * Label: Universal Music |")
def test_extract_wikipedia_section():
section = wikipedia.extract_wikipedia_section("https://en.wikipedia.org/wiki/Mercedes_Sosa", "Studio_albums")
assert section.endswith("| 2015 | Lucerito * Label: RCA |")
assert section.startswith("| Year | Album details |")
assert """| 1962 | [La Voz De La Zafra](/wiki/La_Voz_De_La_Zafra "La Voz De La Zafra") * Label: [RCA](/wiki/RCA_Records "RCA Records") |""" in section