|
import yaml |
|
import requests |
|
from datetime import datetime |
|
from typing import Dict, List, Any |
|
|
|
|
|
def fetch_conference_files() -> List[Dict[str, Any]]: |
|
"""Fetch all conference YAML files from ccfddl repository.""" |
|
|
|
|
|
api_url = "https://api.github.com/repos/ccfddl/ccf-deadlines/contents/conference/AI" |
|
response = requests.get(api_url) |
|
files = response.json() |
|
|
|
conferences = [] |
|
for file in files: |
|
if file['name'].endswith('.yml'): |
|
yaml_content = requests.get(file['download_url']).text |
|
conf_data = yaml.safe_load(yaml_content) |
|
|
|
if isinstance(conf_data, list) and len(conf_data) > 0: |
|
conferences.append(conf_data[0]) |
|
|
|
return conferences |
|
|
|
|
|
def parse_date_range(date_str: str, year: str) -> tuple[str, str]: |
|
"""Parse various date formats and return start and end dates.""" |
|
|
|
date_str = date_str.replace(f", {year}", "") |
|
|
|
|
|
try: |
|
|
|
if ' - ' in date_str: |
|
start, end = date_str.split(' - ') |
|
elif '-' in date_str: |
|
start, end = date_str.split('-') |
|
else: |
|
|
|
start = end = date_str |
|
|
|
|
|
month_map = { |
|
'Sept': 'September', |
|
'Jan': 'January', |
|
'Feb': 'February', |
|
'Mar': 'March', |
|
'Apr': 'April', |
|
'Jun': 'June', |
|
'Jul': 'July', |
|
'Aug': 'August', |
|
'Sep': 'September', |
|
'Oct': 'October', |
|
'Nov': 'November', |
|
'Dec': 'December' |
|
} |
|
|
|
|
|
all_months = set(month_map.keys()) | set(month_map.values()) |
|
|
|
|
|
has_month = any(month in end for month in all_months) |
|
if not has_month: |
|
|
|
start_parts = start.split() |
|
if len(start_parts) >= 1: |
|
end = f"{start_parts[0]} {end.strip()}" |
|
|
|
|
|
for abbr, full in month_map.items(): |
|
start = start.replace(abbr, full) |
|
end = end.replace(abbr, full) |
|
|
|
|
|
start = ' '.join(start.split()) |
|
end = ' '.join(end.split()) |
|
|
|
|
|
start_date = datetime.strptime(f"{start}, {year}", "%B %d, %Y") |
|
|
|
|
|
end_date = datetime.strptime(f"{end}, {year}", "%B %d, %Y") |
|
|
|
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') |
|
|
|
except Exception as e: |
|
raise ValueError(f"Could not parse date: {date_str} ({e})") |
|
|
|
|
|
def transform_conference_data(conferences: List[Dict[str, Any]]) -> List[Dict[str, Any]]: |
|
"""Transform ccfddl format to our format.""" |
|
transformed = [] |
|
current_year = datetime.now().year |
|
|
|
for conf in conferences: |
|
|
|
recent_conf = None |
|
if 'confs' in conf: |
|
for instance in conf['confs']: |
|
if instance['year'] >= current_year: |
|
recent_conf = instance |
|
break |
|
|
|
if not recent_conf: |
|
continue |
|
|
|
|
|
transformed_conf = { |
|
'title': conf.get('title', ''), |
|
'year': recent_conf['year'], |
|
'id': recent_conf['id'], |
|
'full_name': conf.get('description', ''), |
|
'link': recent_conf.get('link', ''), |
|
'deadline': recent_conf.get('timeline', [{}])[0].get('deadline', ''), |
|
'timezone': recent_conf.get('timezone', ''), |
|
'date': recent_conf.get('date', ''), |
|
'tags': [], |
|
} |
|
|
|
|
|
place = recent_conf.get('place', '') |
|
if place: |
|
|
|
if ',' in place: |
|
city, country = place.split(',', 1) |
|
transformed_conf['city'] = city.strip() |
|
transformed_conf['country'] = country.strip() |
|
else: |
|
|
|
transformed_conf['country'] = place.strip() |
|
|
|
|
|
timeline = recent_conf.get('timeline', [{}])[0] |
|
if 'abstract_deadline' in timeline: |
|
transformed_conf['abstract_deadline'] = timeline['abstract_deadline'] |
|
|
|
|
|
try: |
|
if transformed_conf['date']: |
|
start_date, end_date = parse_date_range( |
|
transformed_conf['date'], |
|
str(transformed_conf['year']) |
|
) |
|
transformed_conf['start'] = start_date |
|
transformed_conf['end'] = end_date |
|
except Exception as e: |
|
print(f"Warning: Could not parse date for {transformed_conf['title']}: {e}") |
|
|
|
|
|
if 'rank' in conf: |
|
rankings = [] |
|
for rank_type, rank_value in conf['rank'].items(): |
|
rankings.append(f"{rank_type.upper()}: {rank_value}") |
|
if rankings: |
|
transformed_conf['rankings'] = ', '.join(rankings) |
|
|
|
transformed.append(transformed_conf) |
|
|
|
return transformed |
|
|
|
|
|
def main(): |
|
try: |
|
|
|
current_file = 'src/data/conferences.yml' |
|
with open(current_file, 'r') as f: |
|
current_conferences = yaml.safe_load(f) |
|
|
|
|
|
new_conferences = fetch_conference_files() |
|
if not new_conferences: |
|
print("Warning: No conferences fetched from ccfddl") |
|
return |
|
|
|
transformed_conferences = transform_conference_data(new_conferences) |
|
if not transformed_conferences: |
|
print("Warning: No conferences transformed") |
|
return |
|
|
|
|
|
current_conf_dict = {conf['id']: conf for conf in current_conferences} |
|
|
|
|
|
existing_conf_keys = {(conf['title'], conf['year']) for conf in current_conferences} |
|
|
|
|
|
for new_conf in transformed_conferences: |
|
|
|
conf_key = (new_conf['title'], new_conf['year']) |
|
|
|
|
|
if conf_key in existing_conf_keys and new_conf['id'] not in current_conf_dict: |
|
print(f"Skipping duplicate conference: {new_conf['title']} {new_conf['year']} (ID: {new_conf['id']})") |
|
continue |
|
|
|
if new_conf['id'] in current_conf_dict: |
|
|
|
curr_conf = current_conf_dict[new_conf['id']] |
|
|
|
|
|
preserved_fields = [ |
|
'tags', 'venue', 'hindex', 'submission_deadline', |
|
'timezone_submission', 'rebuttal_period_start', |
|
'rebuttal_period_end', 'final_decision_date', |
|
'review_release_date', 'commitment_deadline', |
|
'start', 'end', 'note', 'city', 'country' |
|
] |
|
for field in preserved_fields: |
|
if field in curr_conf: |
|
new_conf[field] = curr_conf[field] |
|
|
|
|
|
if 'start' not in curr_conf and 'start' in new_conf: |
|
new_conf['start'] = new_conf['start'] |
|
if 'end' not in curr_conf and 'end' in new_conf: |
|
new_conf['end'] = new_conf['end'] |
|
|
|
|
|
if 'rankings' in curr_conf: |
|
new_conf['rankings'] = curr_conf['rankings'] |
|
|
|
|
|
current_conf_dict[new_conf['id']] = new_conf |
|
else: |
|
|
|
current_conf_dict[new_conf['id']] = new_conf |
|
|
|
existing_conf_keys.add(conf_key) |
|
|
|
|
|
all_conferences = list(current_conf_dict.values()) |
|
all_conferences.sort(key=lambda x: x.get('deadline', '9999')) |
|
|
|
|
|
with open(current_file, 'w') as f: |
|
for i, conf in enumerate(all_conferences): |
|
if i > 0: |
|
f.write('\n\n') |
|
|
|
yaml_str = yaml.dump( |
|
[conf], |
|
allow_unicode=True, |
|
sort_keys=False, |
|
default_flow_style=False, |
|
explicit_start=False, |
|
explicit_end=False, |
|
width=float("inf"), |
|
indent=2, |
|
default_style=None, |
|
) |
|
f.write(yaml_str.rstrip()) |
|
|
|
|
|
f.write('\n') |
|
|
|
print(f"Successfully updated {len(all_conferences)} conferences") |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
raise |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |