Spaces:
Sleeping
Sleeping
File size: 1,381 Bytes
03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d 03fbd26 bd98c1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import requests
import json
from time import sleep
# Base API URL
BASE_URL = "https://api.openalex.org/authors"
FILTER = "last_known_institutions.country_code:NO,x_concepts.id:C41008148"
PER_PAGE = 200
OUTPUT_DIR = "C41008148_authors"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Initialize cursor
cursor = "*"
page_count = 1 # Track page numbers for saving files
while cursor:
url = f"{BASE_URL}?filter={FILTER}&per-page={PER_PAGE}&cursor={cursor}"
try:
print(f"Fetching page {page_count} with cursor...")
response = requests.get(url)
response.raise_for_status()
data = response.json()
filename = os.path.join(OUTPUT_DIR, f"{page_count:010}.json")
if os.path.exists(filename):
print(f"File {filename} already exists, skipping...")
cursor = data.get("meta", {}).get("next_cursor")
page_count += 1
continue
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
cursor = data.get("meta", {}).get("next_cursor")
if not cursor:
print("No more results.")
break
page_count += 1
sleep(1) # Rate-limiting
except Exception as e:
print(f"Error on page {page_count}: {e}")
break
print("Download complete using cursor pagination.")
|