wget https://dumps.wikimedia.org/kkwiki/latest/kkwiki-latest-pages-articles.xml.bz2 wget http://data.statmt.org/cc-100/kk.txt.xz unxz kk.txt.xz python3 -m wikiextractor.WikiExtractor kkwiki-latest-pages-articles.xml.bz2 --output extracted --json