Spaces:
Sleeping
Sleeping
File size: 2,066 Bytes
42cd5f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from sparrow_parse.extractor.unstructured_processor import UnstructuredProcessor
from sparrow_parse.extractor.markdown_processor import MarkdownProcessor
import json
def execute_sparrow_processor(options, file_path, strategy, model_name, local, debug):
content, table_content = None, None
if "unstructured" in options:
processor = UnstructuredProcessor()
content, table_content = processor.extract_data(file_path, strategy, model_name,
['tables', 'unstructured'], local, debug)
elif "markdown" in options:
processor = MarkdownProcessor()
content, table_content = processor.extract_data(file_path, ['tables', 'markdown'], local, debug)
return content, table_content
def merge_dicts(json_str1, json_str2):
# Convert JSON strings to dictionaries
dict1 = json.loads(json_str1)
dict2 = json.loads(json_str2)
merged_dict = dict1.copy()
for key, value in dict2.items():
if key in merged_dict and isinstance(merged_dict[key], list) and isinstance(value, list):
merged_dict[key].extend(value)
else:
merged_dict[key] = value
return merged_dict
def track_query_output(keys, json_data, types):
# Convert JSON string to dictionary
data = json.loads(json_data)
# Initialize the result lists
result = []
result_types = []
# Iterate through each key in the keys array
for i, key in enumerate(keys):
# Check if the key is present in the JSON and has a non-empty value
if key not in data or not data[key].strip():
result.append(key)
result_types.append(types[i])
return result, result_types
def add_answer_page(answer, page_name, answer_page):
if not isinstance(answer, dict):
raise ValueError("The answer should be a dictionary.")
# Parse answer_table if it is a JSON string
if isinstance(answer_page, str):
answer_page = json.loads(answer_page)
answer[page_name] = answer_page
return answer
|