### Run this on startup import requests import pandas as pd from datasets import Dataset, DatasetDict from datetime import datetime from tqdm import tqdm url = "https://dumpstermap.herokuapp.com/dumpsters/withinbounds/-180/-90/180/90/" response = requests.get(url) entry = response.json() dumpsters = entry["features"] cols = ["Latitude", "Longitude", "dumpster_created", "voting", "comment", "voting_created", "name"] dumpsters_df = pd.DataFrame(columns=cols) progress_text = "% of spots fetched" num_dumpsters = len(dumpsters) for dumpster in tqdm(dumpsters): url = f"https://dumpstermap.herokuapp.com/dumpsters/{dumpster['id']}" response = requests.get(url) if response.status_code != 200: print("Problem fetching from:", url, response.status_code) try: entry = response.json() rows = [] lat = entry['geometry']['coordinates'][1] lon = entry['geometry']['coordinates'][0] dumpster_created = entry['properties']['created'] for vote in entry['properties']['voting_set']: rows.append({ "Latitude": lat, "Longitude": lon, "dumpster_created": dumpster_created, "voting": vote['value'], "comment": vote['comment'], "voting_created": vote['created_date'], "name": vote['name'] }) if rows: dumpsters_df = pd.concat([dumpsters_df, pd.DataFrame(rows)], ignore_index=True) except Exception as e: print(e) dataset = Dataset.from_pandas(dumpsters_df) dataset_dict = DatasetDict({datetime.now().strftime("%Y.%m.%d"): dataset}) dataset_dict.push_to_hub("Hitchwiki/dumpster_diving_spots")