Spaces:

venkatviswa
/

flowchart-to-text

Sleeping

App Files Files Community

Venkat V commited on Apr 9

Commit

ff2cc46

1 Parent(s): 6baf36d

plumbing code for streamlit, fast api

Browse files

Files changed (9) hide show

.DS_Store +0 -0
app.py +72 -1
graph_module/.DS_Store +0 -0
graph_module/__init__.py +79 -1
ocr_module/__init__.py +18 -1
requirements.txt +2 -0
streamlit_app.py +65 -1
summarizer_module/__init__.py +34 -1
yolo_module/__init__.py +16 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

	@@ -1 +1,72 @@
1	- # ~~Placeholder~~

+# app.py
+from fastapi import FastAPI, UploadFile, File,Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import uvicorn
+from PIL import Image
+import io
+import json
+# Import your pipeline modules
+from yolo_module import run_yolo
+from ocr_module import extract_text
+from graph_module import map_arrows, build_flowchart_json
+from summarizer_module import summarize_flowchart
+app = FastAPI()
+# CORS for Streamlit access
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.post("/process-image")
+async def process_image(file: UploadFile = File(...), debug: str = Form("false")):
+    debug_mode = debug.lower() == "true"
+    debug_log = []
+    if debug_mode: debug_log.append("📥 Received file: file")
+    print("📥 Received file:", file.filename)
+    contents = await file.read()
+    image = Image.open(io.BytesIO(contents)).convert("RGB")
+    if debug_mode: debug_log.append("✅ Image loaded and converted to RGB")
+    print("✅ Image loaded and converted to RGB")
+    # Step 1: Run YOLO detection
+    boxes, arrows = run_yolo(image)
+    if debug_mode: debug_log.append(f"📦 YOLO detected {len(boxes)} boxes and {len(arrows)} arrows")
+    # Step 2: OCR on boxes
+    for box in boxes:
+        box["text"] = extract_text(image, box["bbox"])
+        if debug_mode: debug_log.append(f"🔍 OCR text for box {box['id']}: {box['text']}")
+        print(f"🔍 OCR text for box {box['id']}: {box['text']}")
+    # Step 3: Map arrows to boxes and build graph
+    edges = map_arrows(boxes, arrows)
+    if debug_mode: debug_log.append(f"🧭 Mapped {len(edges)} edges from arrows to boxes")
+    flowchart_json = build_flowchart_json(boxes, edges)
+    print("🧠 Flowchart JSON structure:")
+    print(json.dumps(flowchart_json, indent=2))
+    # Step 4: Summarize flowchart in English
+    summary = summarize_flowchart(flowchart_json)
+    print("📝 Generated English summary:")
+    print(summary)
+    return JSONResponse({
+        "flowchart": flowchart_json,
+        "summary": summary,
+        "debug": "\n".join(debug_log) if debug_mode else ""
+    })
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

graph_module/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

graph_module/__init__.py CHANGED Viewed

	@@ -1 +1,79 @@
1	- # ~~Placeholder~~

+# flowchart_builder.py
+from shapely.geometry import box, Point
+def map_arrows(nodes, arrows):
+    """
+    Matches arrows to source and target nodes based on geometric positions.
+    Returns a list of edges as (source_id, target_id).
+    """
+    for node in nodes:
+        node["shape"] = box(*node["bbox"])
+    edges = []
+    for arrow in arrows:
+        tail_point = Point(arrow["tail"])
+        head_point = Point(arrow["head"])
+        source = next((n["id"] for n in nodes if n["shape"].contains(tail_point)), None)
+        target = next((n["id"] for n in nodes if n["shape"].contains(head_point)), None)
+        if source and target:
+            edges.append((source, target))
+    return edges
+def build_flowchart_json(nodes, edges):
+    """
+    Builds a structured JSON from node and edge data.
+    """
+    # Step 1: Build graph
+    graph = {n["id"]: {"text": n.get("text", ""), "type": n["type"], "next": []} for n in nodes}
+    for source, target in edges:
+        graph[source]["next"].append(target)
+    # Step 2: Build flowchart JSON format
+    flowchart_json = {
+        "start": next((n["id"] for n in nodes if n["type"] == "start"), None),
+        "steps": []
+    }
+    for node_id, info in graph.items():
+        step = {
+            "id": node_id,
+            "text": info["text"],
+            "type": info["type"]
+        }
+        if info["type"] == "decision" and len(info["next"]) >= 2:
+            step["branches"] = {
+                "yes": info["next"][0],
+                "no": info["next"][1]
+            }
+        elif len(info["next"]) == 1:
+            step["next"] = info["next"][0]
+        flowchart_json["steps"].append(step)
+    return flowchart_json
+# Example usage
+if __name__ == "__main__":
+    nodes = [
+        {"id": "node1", "bbox": [100, 100, 200, 150], "text": "Start", "type": "start"},
+        {"id": "node2", "bbox": [300, 100, 400, 150], "text": "Is valid?", "type": "decision"},
+        {"id": "node3", "bbox": [500, 50, 600, 100], "text": "Approve", "type": "process"},
+        {"id": "node4", "bbox": [500, 150, 600, 200], "text": "Reject", "type": "process"}
+    ]
+    arrows = [
+        {"id": "arrow1", "tail": (200, 125), "head": (300, 125)},
+        {"id": "arrow2", "tail": (400, 125), "head": (500, 75)},
+        {"id": "arrow3", "tail": (400, 125), "head": (500, 175)}
+    ]
+    edges = map_arrows(nodes, arrows)
+    flowchart_json = build_flowchart_json(nodes, edges)
+    import json
+    print(json.dumps(flowchart_json, indent=2))

ocr_module/__init__.py CHANGED Viewed

	@@ -1 +1,18 @@
1	- # ~~Placeholder~~

+import pytesseract
+from PIL import Image
+def extract_text(image, bbox):
+    """
+    Run OCR on a cropped region of the image.
+    Parameters:
+        image (PIL.Image): The full image.
+        bbox (list): [x1, y1, x2, y2] coordinates of the region to crop.
+    Returns:
+        str: Extracted text.
+    """
+    x1, y1, x2, y2 = bbox
+    cropped = image.crop((x1, y1, x2, y2))
+    text = pytesseract.image_to_string(cropped).strip()
+    return text

requirements.txt CHANGED Viewed

@@ -4,3 +4,5 @@ pillow
 shapely
 pytesseract
 transformers

 shapely
 pytesseract
 transformers
+torch
+python-multipart

streamlit_app.py CHANGED Viewed

	@@ -1 +1,65 @@
1	- # ~~Placeholder~~

+# streamlit_app.py
+import streamlit as st
+import requests
+import json
+from PIL import Image
+import io
+API_URL = "http://localhost:7860/process-image"  # Change if hosted elsewhere
+st.set_page_config(page_title="Flowchart to English", layout="wide")
+st.title("📄 Flowchart to Plain English")
+# Debug mode switch
+debug_mode = st.toggle("🔧 Show Debug Info", value=False)
+uploaded_file = st.file_uploader("Upload a flowchart image", type=["png", "jpg", "jpeg"])
+if uploaded_file:
+    # Resize image for smaller canvas
+    image = Image.open(uploaded_file)
+    max_width = 600
+    ratio = max_width / float(image.size[0])
+    new_height = int((float(image.size[1]) * float(ratio)))
+    resized_image = image.resize((max_width, new_height))
+    st.image(resized_image, caption="Uploaded Image", use_container_width=False)
+    if st.button("🔍 Analyze Flowchart"):
+        progress = st.progress(0, text="Sending image to backend...")
+        try:
+            response = requests.post(
+                API_URL,
+                files={"file": uploaded_file.getvalue()},
+                data={"debug": str(debug_mode).lower()}
+            )
+            progress.progress(50, text="Processing detection, OCR, and reasoning...")
+            if response.status_code == 200:
+                data = response.json()
+                progress.progress(80, text="Generating explanation using LLM...")
+                # Display debug summary from backend (conditionally)
+                if debug_mode:
+                    debug_info = data.get("debug", "No debug info available.")
+                    st.markdown("### 🧪 Debug Pipeline Info")
+                    st.code(debug_info, language="markdown")
+                # Display side-by-side columns
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader("🧠 Flowchart JSON")
+                    st.json(data["flowchart"])
+                with col2:
+                    st.subheader("📝 English Summary")
+                    st.markdown(data["summary"])
+                progress.progress(100, text="Done!")
+            else:
+                st.error(f"Something went wrong: {response.status_code}")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+else:
+    st.info("Upload a flowchart image to begin.")

summarizer_module/__init__.py CHANGED Viewed

	@@ -1 +1,34 @@
1	- # ~~Placeholder~~

+# summarizer_module/__init__.py
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# Use a small local model (e.g., Phi-2)
+MODEL_ID = "microsoft/phi-2"  # Ensure it's downloaded and cached locally
+# Load model and tokenizer
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+summarizer = pipeline("text-generation", model=model, tokenizer=tokenizer)
+def summarize_flowchart(flowchart_json):
+    """
+    Given a flowchart JSON with 'start' and 'steps', returns a plain English explanation
+    formatted as bullets and sub-bullets.
+    Args:
+        flowchart_json (dict): Structured representation of flowchart
+    Returns:
+        str: Bullet-style natural language summary of the logic
+    """
+    prompt = (
+        "Turn the following flowchart into a bullet-point explanation in plain English.\n"
+        "Use bullets for steps and sub-bullets for branches.\n"
+        "\n"
+        f"Flowchart JSON:\n{flowchart_json}\n"
+        "\nExplanation:"
+    )
+    result = summarizer(prompt, max_new_tokens=300, do_sample=False)[0]["generated_text"]
+    explanation = result.split("Explanation:")[-1].strip()
+    return explanation

yolo_module/__init__.py CHANGED Viewed

	@@ -1 +1,16 @@
1	- # ~~Placeholder~~

+# yolo_module/__init__.py
+def run_yolo(image):
+    """
+    Placeholder YOLO inference function.
+    In actual implementation, load your model and return detected boxes/arrows.
+    """
+    # For now, return dummy values
+    boxes = [
+        {"id": "node1", "bbox": [100, 100, 200, 150], "text": "", "type": "start"},
+        {"id": "node2", "bbox": [300, 100, 400, 150], "text": "", "type": "decision"},
+    ]
+    arrows = [
+        {"id": "arrow1", "tail": (200, 125), "head": (300, 125)}
+    ]
+    return boxes, arrows