import gradio as gr
import cv2
import numpy as np
import requests
from ultralytics import YOLO

# Load the Latex2Layout model
model_path = "latex2layout_object_detection_yolov8.pt"
latex2layout_model = YOLO(model_path)

def detect_layout(image):
    """
    Perform layout detection on the uploaded image using the Latex2Layout model.
    
    Args:
        image: The uploaded image (numpy array)
        
    Returns:
        annotated_image: Image with detection boxes drawn
        layout_info: Text description of detected layout elements
    """
    if image is None:
        return None, "Error: No image uploaded."
    
    # Run detection
    results = latex2layout_model(image)
    result = results[0]
    
    # Create a copy of the image for visualization
    annotated_image = image.copy()
    layout_annotations = []
    
    # Get image dimensions
    img_height, img_width = image.shape[:2]
    
    # Process detection results
    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        cls_name = result.names[cls_id]
        
        # Draw bounding box and label on the image
        color = tuple(np.random.randint(0, 255, 3).tolist())
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
        label = f'{cls_name} {conf:.2f}'
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
        cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # Format layout info for Qwen2.5-VL
        layout_annotations.append(f"{cls_name} at position ({x1},{y1},{x2},{y2}) with confidence {conf:.2f}")
    
    layout_info = "Detected layout elements: " + "; ".join(layout_annotations) if layout_annotations else "No layout elements detected."
    return annotated_image, layout_info

def call_qwen_vl_api(api_url, image, layout_info, question):
    """
    Call the Qwen2.5-VL API with the image, layout info, and user question.
    
    Args:
        api_url: The URL of the Qwen2.5-VL API
        image: The uploaded image (numpy array)
        layout_info: Text description of detected layout elements
        question: User's question about the image and layout
        
    Returns:
        answer: Response from the Qwen2.5-VL API
    """
    if not api_url:
        return "Error: Please provide a valid Qwen2.5-VL API URL."
    if not question:
        return "Error: Please enter a question."
    
    try:
        # Convert image to a format suitable for API (e.g., base64 or raw bytes might be needed; adjust per API spec)
        # Here, we assume the API accepts a URL or raw data; for simplicity, we use a placeholder
        payload = {
            "image": image.tolist(),  # Adjust this based on API requirements (e.g., base64 encoding)
            "prompt": f"{layout_info}\n\nQuestion: {question}",
        }
        response = requests.post(api_url, json=payload, timeout=30)
        response.raise_for_status()  # Raise an error for bad status codes
        return response.json().get("answer", "Error: No answer received from API.")
    except requests.exceptions.RequestException as e:
        return f"Error: API call failed - {str(e)}"

def process_image_and_question(api_url, image, question):
    """
    Process the image with Latex2Layout and query Qwen2.5-VL API.
    
    Args:
        api_url: Qwen2.5-VL API URL
        image: Uploaded image
        question: User's question
        
    Returns:
        annotated_image: Image with detection boxes
        layout_info: Detected layout description
        answer: API response to the question
    """
    annotated_image, layout_info = detect_layout(image)
    if annotated_image is None:
        return None, layout_info, "Error: Detection failed."
    
    answer = call_qwen_vl_api(api_url, image, layout_info, question)
    return annotated_image, layout_info, answer

# Custom CSS for styling
custom_css = """
    .container { max-width: 1200px; margin: auto; }
    .button-primary { background-color: #4CAF50; color: white; }
    .gr-image { border: 2px solid #ddd; border-radius: 5px; }
    .gr-textbox { font-family: monospace; }
"""

# Create Gradio interface
with gr.Blocks(
    title="Latex2Layout Detection & QA",
    theme=gr.themes.Default(),
    css=custom_css
) as demo:
    gr.Markdown(
        """
        # Latex2Layout Layout Detection & Q&A
        Upload an image to detect layout elements using the **Latex2Layout** model, then ask questions about the layout and image content using the Qwen2.5-VL API.
        """
    )
    
    # API URL input
    api_url_input = gr.Textbox(
        label="Qwen2.5-VL API URL",
        placeholder="Enter the Qwen2.5-VL API URL here",
        value=""
    )
    
    # Main layout
    with gr.Row():
        # Input column
        with gr.Column(scale=1):
            input_image = gr.Image(
                label="Upload Image",
                type="numpy",
                height=400,
                elem_classes="gr-image"
            )
            question_input = gr.Textbox(
                label="Ask a Question",
                placeholder="e.g., What is the layout structure of this image?",
                lines=2
            )
            submit_btn = gr.Button(
                "Detect & Ask",
                variant="primary",
                elem_classes="button-primary"
            )
            gr.Markdown("**Tip**: Provide a clear image and specific question for best results.")
        
        # Output column
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Detected Layout",
                height=400,
                elem_classes="gr-image"
            )
            layout_output = gr.Textbox(
                label="Layout Information",
                lines=5,
                max_lines=10,
                elem_classes="gr-textbox"
            )
            answer_output = gr.Textbox(
                label="Answer",
                lines=5,
                max_lines=10,
                elem_classes="gr-textbox"
            )
    
    # Event handler
    submit_btn.click(
        fn=process_image_and_question,
        inputs=[api_url_input, input_image, question_input],
        outputs=[output_image, layout_output, answer_output],
        _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
        show_progress=True
    ).then(
        fn=lambda: gr.update(value="Detect & Ask"),
        outputs=submit_btn,
        _js="() => { document.querySelector('.button-primary').innerText = 'Detect & Ask'; }"
    )

# Launch the application
if __name__ == "__main__":
    demo.launch()