tillwenke's picture
Update run.py
f46589f verified
### Run this on startup
import requests
import pandas as pd
from datasets import Dataset, DatasetDict
from datetime import datetime
from tqdm import tqdm
url = "https://dumpstermap.herokuapp.com/dumpsters/withinbounds/-180/-90/180/90/"
response = requests.get(url)
entry = response.json()
dumpsters = entry["features"]
cols = ["Latitude", "Longitude", "dumpster_created", "voting", "comment", "voting_created", "name"]
dumpsters_df = pd.DataFrame(columns=cols)
progress_text = "% of spots fetched"
num_dumpsters = len(dumpsters)
for dumpster in tqdm(dumpsters):
url = f"https://dumpstermap.herokuapp.com/dumpsters/{dumpster['id']}"
response = requests.get(url)
if response.status_code != 200:
print("Problem fetching from:", url, response.status_code)
try:
entry = response.json()
rows = []
lat = entry['geometry']['coordinates'][1]
lon = entry['geometry']['coordinates'][0]
dumpster_created = entry['properties']['created']
for vote in entry['properties']['voting_set']:
rows.append({
"Latitude": lat,
"Longitude": lon,
"dumpster_created": dumpster_created,
"voting": vote['value'],
"comment": vote['comment'],
"voting_created": vote['created_date'],
"name": vote['name']
})
if rows:
dumpsters_df = pd.concat([dumpsters_df, pd.DataFrame(rows)], ignore_index=True)
except Exception as e:
print(e)
dataset = Dataset.from_pandas(dumpsters_df)
dataset_dict = DatasetDict({datetime.now().strftime("%Y.%m.%d"): dataset})
dataset_dict.push_to_hub("Hitchwiki/dumpster_diving_spots")