Spaces:

katanaml
/

sparrow-ml

Sleeping

App Files Files Community

sparrow-ml / rag /agents /instructor /helpers /instructor_helper.py

katanaml

Sparrow Parse

42cd5f6 8 months ago

raw

history blame contribute delete

2.07 kB

	from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
	from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
	import json


	def execute_sparrow_processor(options, file_path, strategy, model_name, local, debug):
	content, table_content = None, None
	if "unstructured" in options:
	processor = UnstructuredProcessor()
	content, table_content = processor.extract_data(file_path, strategy, model_name,
	['tables', 'unstructured'], local, debug)
	elif "markdown" in options:
	processor = MarkdownProcessor()
	content, table_content = processor.extract_data(file_path, ['tables', 'markdown'], local, debug)

	return content, table_content


	def merge_dicts(json_str1, json_str2):
	# Convert JSON strings to dictionaries
	dict1 = json.loads(json_str1)
	dict2 = json.loads(json_str2)

	merged_dict = dict1.copy()
	for key, value in dict2.items():
	if key in merged_dict and isinstance(merged_dict[key], list) and isinstance(value, list):
	merged_dict[key].extend(value)
	else:
	merged_dict[key] = value
	return merged_dict


	def track_query_output(keys, json_data, types):
	# Convert JSON string to dictionary
	data = json.loads(json_data)

	# Initialize the result lists
	result = []
	result_types = []

	# Iterate through each key in the keys array
	for i, key in enumerate(keys):
	# Check if the key is present in the JSON and has a non-empty value
	if key not in data or not data[key].strip():
	result.append(key)
	result_types.append(types[i])

	return result, result_types


	def add_answer_page(answer, page_name, answer_page):
	if not isinstance(answer, dict):
	raise ValueError("The answer should be a dictionary.")

	# Parse answer_table if it is a JSON string
	if isinstance(answer_page, str):
	answer_page = json.loads(answer_page)

	answer[page_name] = answer_page
	return answer