File size: 3,351 Bytes
c2fb848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# flowchart_builder.py
# Arrow and graph logic for converting detected flowchart elements to structured JSON

from shapely.geometry import box, Point
from collections import defaultdict, deque

def map_arrows(nodes, arrows):
    """
    Matches arrows to nodes based on geometric endpoints.
    Returns a list of (source_id, target_id, label) edges.
    """
    for node in nodes:
        node["shape"] = box(*node["bbox"])

    edges = []
    for arrow in arrows:
        tail_point = Point(arrow["tail"])
        head_point = Point(arrow["head"])
        label = arrow.get("label", "")

        source = next((n["id"] for n in nodes if n["shape"].contains(tail_point)), None)
        target = next((n["id"] for n in nodes if n["shape"].contains(head_point)), None)

        if source and target and source != target:
            edges.append((source, target, label))

    return edges

def detect_node_type(text):
    """
    Heuristic-based type detection from node text.
    """
    text_lower = text.lower()
    if "start" in text_lower:
        return "start"
    if "end" in text_lower or "full" in text_lower:
        return "end"
    if "?" in text or "yes" in text_lower or "no" in text_lower:
        return "decision"
    return "process"

def build_flowchart_json(nodes, edges):
    """
    Constructs flowchart JSON structure with parent and branching info.
    """
    graph = {}
    reverse_links = defaultdict(list)
    edge_labels = {}

    for node in nodes:
        text = node.get("text", "").strip()
        graph[node["id"]] = {
            "text": text,
            "type": node.get("type") or detect_node_type(text),
            "next": []
        }

    for src, tgt, label in edges:
        graph[src]["next"].append(tgt)
        reverse_links[tgt].append(src)
        edge_labels[(src, tgt)] = label.lower().strip()

    start_nodes = [nid for nid in graph if len(reverse_links[nid]) == 0]
    flowchart_json = {
        "start": start_nodes[0] if start_nodes else None,
        "steps": []
    }

    visited = set()
    queue = deque(start_nodes)

    while queue:
        curr = queue.popleft()
        if curr in visited:
            continue
        visited.add(curr)

        node = graph[curr]
        step = {
            "id": curr,
            "text": node["text"],
            "type": node["type"]
        }

        parents = reverse_links[curr]
        if len(parents) == 1:
            step["parent"] = parents[0]
        elif len(parents) > 1:
            step["parents"] = parents

        next_nodes = node["next"]
        if node["type"] == "decision" and len(next_nodes) >= 2:
            step["branches"] = {}
            for tgt in next_nodes:
                label = edge_labels.get((curr, tgt), "")
                if "yes" in label:
                    step["branches"]["yes"] = tgt
                elif "no" in label:
                    step["branches"]["no"] = tgt
                else:
                    step["branches"].setdefault("unknown", []).append(tgt)
            queue.extend(next_nodes)
        elif len(next_nodes) == 1:
            step["next"] = next_nodes[0]
            queue.append(next_nodes[0])
        elif len(next_nodes) > 1:
            step["next"] = next_nodes
            queue.extend(next_nodes)

        flowchart_json["steps"].append(step)

    return flowchart_json