Spaces:

bulentsoykan
/

streamlit-OCR-app

Running

App Files Files Community

bulentsoykan commited on Mar 17

Commit

6a1b293

verified ·

1 Parent(s): cbfe773

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -23

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import streamlit as st
-import ollama
 from PIL import Image
 import io
 import base64
 # Page configuration
 st.set_page_config(
     page_title="Gemma-3 OCR",
@@ -10,10 +12,49 @@ st.set_page_config(
     layout="wide",
     initial_sidebar_state="expanded"
 )
 # Title and description in main area
-st.markdown("""
-    # <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Gemma-3 OCR
-""".format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
 # Add clear button to top right
 col1, col2 = st.columns([6,1])
 with col2:
@@ -21,40 +62,42 @@ with col2:
         if 'ocr_result' in st.session_state:
             del st.session_state['ocr_result']
         st.rerun()
 st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Gemma-3 Vision!</p>', unsafe_allow_html=True)
 st.markdown("---")
 # Move upload controls to sidebar
 with st.sidebar:
     st.header("Upload Image")
     uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
     if uploaded_file is not None:
         # Display the uploaded image
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Image")
-        if st.button("Extract Text 🔍", type="primary"):
-            with st.spinner("Processing image..."):
-                try:
-                    response = ollama.chat(
-                        model='gemma3:12b',
-                        messages=[{
-                            'role': 'user',
-                            'content': """Analyze the text in the provided image. Extract all readable content
-                                        and present it in a structured Markdown format that is clear, concise,
-                                        and well-organized. Ensure proper formatting (e.g., headings, lists, or
-                                        code blocks) as necessary to represent the content effectively.""",
-                            'images': [uploaded_file.getvalue()]
-                        }]
-                    )
-                    st.session_state['ocr_result'] = response.message.content
-                except Exception as e:
-                    st.error(f"Error processing image: {str(e)}")
 # Main content area for results
 if 'ocr_result' in st.session_state:
     st.markdown(st.session_state['ocr_result'])
 else:
     st.info("Upload an image and click 'Extract Text' to see the results here.")
 # Footer
 st.markdown("---")
-st.markdown("Made with ❤️ using Gemma-3 Vision Model ")

 import streamlit as st
 from PIL import Image
 import io
 import base64
+import requests
+import os
 # Page configuration
 st.set_page_config(
     page_title="Gemma-3 OCR",
     layout="wide",
     initial_sidebar_state="expanded"
 )
+# Set up Hugging Face API
+HF_API_KEY = os.environ.get("HF_API_KEY", "")  # Get API key from environment variable
+if not HF_API_KEY:
+    HF_API_KEY = st.secrets.get("HF_API_KEY", "")  # Try getting from Streamlit secrets
+# Hugging Face API function
+def process_image_with_hf(image_bytes):
+    API_URL = "https://api-inference.huggingface.co/models/google/gemma-3-vision"
+    headers = {"Authorization": f"Bearer {HF_API_KEY}"}
+    # Convert image to base64
+    image_b64 = base64.b64encode(image_bytes).decode('utf-8')
+    # Prepare payload
+    payload = {
+        "inputs": {
+            "image": image_b64,
+            "text": """Analyze the text in the provided image. Extract all readable content
+                    and present it in a structured Markdown format that is clear, concise,
+                    and well-organized. Ensure proper formatting (e.g., headings, lists, or
+                    code blocks) as necessary to represent the content effectively."""
+        }
+    }
+    # Make API request
+    response = requests.post(API_URL, headers=headers, json=payload)
+    if response.status_code != 200:
+        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
+    return response.json()[0]["generated_text"]
 # Title and description in main area
+try:
+    # Try to load the image from assets folder
+    st.markdown("""
+        # <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Gemma-3 OCR
+    """.format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
+except FileNotFoundError:
+    # Fallback if image doesn't exist
+    st.title("Gemma-3 OCR")
 # Add clear button to top right
 col1, col2 = st.columns([6,1])
 with col2:
         if 'ocr_result' in st.session_state:
             del st.session_state['ocr_result']
         st.rerun()
 st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Gemma-3 Vision!</p>', unsafe_allow_html=True)
 st.markdown("---")
 # Move upload controls to sidebar
 with st.sidebar:
     st.header("Upload Image")
     uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
     if uploaded_file is not None:
         # Display the uploaded image
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Image")
+        # Check if API key is available
+        if not HF_API_KEY:
+            st.error("Hugging Face API key is missing. Please set it as an environment variable or in Streamlit secrets.")
+        else:
+            if st.button("Extract Text 🔍", type="primary"):
+                with st.spinner("Processing image..."):
+                    try:
+                        # Get image bytes
+                        img_bytes = uploaded_file.getvalue()
+                        # Process with Hugging Face API
+                        result = process_image_with_hf(img_bytes)
+                        st.session_state['ocr_result'] = result
+                    except Exception as e:
+                        st.error(f"Error processing image: {str(e)}")
 # Main content area for results
 if 'ocr_result' in st.session_state:
     st.markdown(st.session_state['ocr_result'])
 else:
     st.info("Upload an image and click 'Extract Text' to see the results here.")
 # Footer
 st.markdown("---")
+st.markdown("Made with using Gemma-3 Vision Model | [Report an Issue](https://github.com/bulentsoykan/streamlit-OCR-app/issues)")