import os import requests import json from time import sleep # Base API URL BASE_URL = "https://api.openalex.org/authors" FILTER = "last_known_institutions.country_code:NO,x_concepts.id:C41008148" PER_PAGE = 200 OUTPUT_DIR = "C41008148_authors" os.makedirs(OUTPUT_DIR, exist_ok=True) # Initialize cursor cursor = "*" page_count = 1 # Track page numbers for saving files while cursor: url = f"{BASE_URL}?filter={FILTER}&per-page={PER_PAGE}&cursor={cursor}" try: print(f"Fetching page {page_count} with cursor...") response = requests.get(url) response.raise_for_status() data = response.json() filename = os.path.join(OUTPUT_DIR, f"{page_count:010}.json") if os.path.exists(filename): print(f"File {filename} already exists, skipping...") cursor = data.get("meta", {}).get("next_cursor") page_count += 1 continue with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) cursor = data.get("meta", {}).get("next_cursor") if not cursor: print("No more results.") break page_count += 1 sleep(1) # Rate-limiting except Exception as e: print(f"Error on page {page_count}: {e}") break print("Download complete using cursor pagination.")