FireShadow's picture
Initial clean commit
1721aea
import pandas as pd
import json
import argparse
from pathlib import Path
from typing import List
##TODO: later add logs
def create_json(csv_file_loc:str, output_folder:str, output_file_name:str,
base_data_folder:str, data_attribute:str="data_files") -> List:
"""
Creates a json file containing the causal query and its associated metadata from
the csv file
Args:
csv_file_loc: path to the csv file
output_folder: path to the folder where the json file is saved
output_file_name: name of the output json file
base_data_folder: path to the folder where the data is saved
data_attribute: name of the column in the csv file containing the data file name
"""
try:
df = pd.read_csv(csv_file_loc)
except FileNotFoundError:
print(f"File not found:{csv_file_loc}. Make sure the file path is correct.")
json_df = df.to_dict(orient="records")
print("Checking if referenced csv files are available")
all_exists = True
for data in json_df:
#print(base_data_folder, data[data_attribute])
full_path = Path(base_data_folder) / data[data_attribute]
if not full_path.exists():
print(f"File not found: {full_path}. Re-check the name of the data file.")
all_exists = False & all_exists
else:
data[data_attribute] = str(full_path)
if not all_exists:
print("Some data files are missing or incorrectly name")
else:
print("All data files are available. Good to go.")
if ".json" not in output_file_name:
output_file_name = output_file_name + ".json"
output_path = Path(output_folder)
output_path.mkdir(parents=True, exist_ok=True)
output_file_path = output_path / output_file_name
with open(output_file_path, "w") as f:
json.dump(json_df, f, indent=4)
print(f"Json file created at {output_file_path}")
f.close()
return json_df