Spaces:

CausalNLP
/

causal-agent

Running

App Files Files Community

causal-agent / auto_causal /preprocess /json.py

FireShadow

Initial clean commit

1721aea 9 days ago

raw

history blame contribute delete

2 kB

	import pandas as pd
	import json
	import argparse
	from pathlib import Path
	from typing import List

	##TODO: later add logs


	def create_json(csv_file_loc:str, output_folder:str, output_file_name:str,
	base_data_folder:str, data_attribute:str="data_files") -> List:
	"""
	Creates a json file containing the causal query and its associated metadata from
	the csv file

	Args:
	csv_file_loc: path to the csv file
	output_folder: path to the folder where the json file is saved
	output_file_name: name of the output json file
	base_data_folder: path to the folder where the data is saved
	data_attribute: name of the column in the csv file containing the data file name
	"""

	try:
	df = pd.read_csv(csv_file_loc)
	except FileNotFoundError:
	print(f"File not found:{csv_file_loc}. Make sure the file path is correct.")

	json_df = df.to_dict(orient="records")

	print("Checking if referenced csv files are available")
	all_exists = True
	for data in json_df:
	#print(base_data_folder, data[data_attribute])
	full_path = Path(base_data_folder) / data[data_attribute]
	if not full_path.exists():
	print(f"File not found: {full_path}. Re-check the name of the data file.")
	all_exists = False & all_exists
	else:
	data[data_attribute] = str(full_path)

	if not all_exists:
	print("Some data files are missing or incorrectly name")
	else:
	print("All data files are available. Good to go.")

	if ".json" not in output_file_name:
	output_file_name = output_file_name + ".json"

	output_path = Path(output_folder)
	output_path.mkdir(parents=True, exist_ok=True)
	output_file_path = output_path / output_file_name
	with open(output_file_path, "w") as f:
	json.dump(json_df, f, indent=4)
	print(f"Json file created at {output_file_path}")
	f.close()

	return json_df