bulentsoykan commited on
Commit
6a1b293
·
verified ·
1 Parent(s): cbfe773

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -23
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import streamlit as st
2
- import ollama
3
  from PIL import Image
4
  import io
5
  import base64
 
 
 
6
  # Page configuration
7
  st.set_page_config(
8
  page_title="Gemma-3 OCR",
@@ -10,10 +12,49 @@ st.set_page_config(
10
  layout="wide",
11
  initial_sidebar_state="expanded"
12
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Title and description in main area
14
- st.markdown("""
15
- # <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Gemma-3 OCR
16
- """.format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
 
 
 
 
 
 
17
  # Add clear button to top right
18
  col1, col2 = st.columns([6,1])
19
  with col2:
@@ -21,40 +62,42 @@ with col2:
21
  if 'ocr_result' in st.session_state:
22
  del st.session_state['ocr_result']
23
  st.rerun()
 
24
  st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Gemma-3 Vision!</p>', unsafe_allow_html=True)
25
  st.markdown("---")
 
26
  # Move upload controls to sidebar
27
  with st.sidebar:
28
  st.header("Upload Image")
29
  uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
30
-
31
  if uploaded_file is not None:
32
  # Display the uploaded image
33
  image = Image.open(uploaded_file)
34
  st.image(image, caption="Uploaded Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- if st.button("Extract Text 🔍", type="primary"):
37
- with st.spinner("Processing image..."):
38
- try:
39
- response = ollama.chat(
40
- model='gemma3:12b',
41
- messages=[{
42
- 'role': 'user',
43
- 'content': """Analyze the text in the provided image. Extract all readable content
44
- and present it in a structured Markdown format that is clear, concise,
45
- and well-organized. Ensure proper formatting (e.g., headings, lists, or
46
- code blocks) as necessary to represent the content effectively.""",
47
- 'images': [uploaded_file.getvalue()]
48
- }]
49
- )
50
- st.session_state['ocr_result'] = response.message.content
51
- except Exception as e:
52
- st.error(f"Error processing image: {str(e)}")
53
  # Main content area for results
54
  if 'ocr_result' in st.session_state:
55
  st.markdown(st.session_state['ocr_result'])
56
  else:
57
  st.info("Upload an image and click 'Extract Text' to see the results here.")
 
58
  # Footer
59
  st.markdown("---")
60
- st.markdown("Made with ❤️ using Gemma-3 Vision Model ")
 
1
  import streamlit as st
 
2
  from PIL import Image
3
  import io
4
  import base64
5
+ import requests
6
+ import os
7
+
8
  # Page configuration
9
  st.set_page_config(
10
  page_title="Gemma-3 OCR",
 
12
  layout="wide",
13
  initial_sidebar_state="expanded"
14
  )
15
+
16
+ # Set up Hugging Face API
17
+ HF_API_KEY = os.environ.get("HF_API_KEY", "") # Get API key from environment variable
18
+ if not HF_API_KEY:
19
+ HF_API_KEY = st.secrets.get("HF_API_KEY", "") # Try getting from Streamlit secrets
20
+
21
+ # Hugging Face API function
22
+ def process_image_with_hf(image_bytes):
23
+ API_URL = "https://api-inference.huggingface.co/models/google/gemma-3-vision"
24
+ headers = {"Authorization": f"Bearer {HF_API_KEY}"}
25
+
26
+ # Convert image to base64
27
+ image_b64 = base64.b64encode(image_bytes).decode('utf-8')
28
+
29
+ # Prepare payload
30
+ payload = {
31
+ "inputs": {
32
+ "image": image_b64,
33
+ "text": """Analyze the text in the provided image. Extract all readable content
34
+ and present it in a structured Markdown format that is clear, concise,
35
+ and well-organized. Ensure proper formatting (e.g., headings, lists, or
36
+ code blocks) as necessary to represent the content effectively."""
37
+ }
38
+ }
39
+
40
+ # Make API request
41
+ response = requests.post(API_URL, headers=headers, json=payload)
42
+
43
+ if response.status_code != 200:
44
+ raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
45
+
46
+ return response.json()[0]["generated_text"]
47
+
48
  # Title and description in main area
49
+ try:
50
+ # Try to load the image from assets folder
51
+ st.markdown("""
52
+ # <img src="data:image/png;base64,{}" width="50" style="vertical-align: -12px;"> Gemma-3 OCR
53
+ """.format(base64.b64encode(open("./assets/gemma3.png", "rb").read()).decode()), unsafe_allow_html=True)
54
+ except FileNotFoundError:
55
+ # Fallback if image doesn't exist
56
+ st.title("Gemma-3 OCR")
57
+
58
  # Add clear button to top right
59
  col1, col2 = st.columns([6,1])
60
  with col2:
 
62
  if 'ocr_result' in st.session_state:
63
  del st.session_state['ocr_result']
64
  st.rerun()
65
+
66
  st.markdown('<p style="margin-top: -20px;">Extract structured text from images using Gemma-3 Vision!</p>', unsafe_allow_html=True)
67
  st.markdown("---")
68
+
69
  # Move upload controls to sidebar
70
  with st.sidebar:
71
  st.header("Upload Image")
72
  uploaded_file = st.file_uploader("Choose an image...", type=['png', 'jpg', 'jpeg'])
73
+
74
  if uploaded_file is not None:
75
  # Display the uploaded image
76
  image = Image.open(uploaded_file)
77
  st.image(image, caption="Uploaded Image")
78
+
79
+ # Check if API key is available
80
+ if not HF_API_KEY:
81
+ st.error("Hugging Face API key is missing. Please set it as an environment variable or in Streamlit secrets.")
82
+ else:
83
+ if st.button("Extract Text 🔍", type="primary"):
84
+ with st.spinner("Processing image..."):
85
+ try:
86
+ # Get image bytes
87
+ img_bytes = uploaded_file.getvalue()
88
+
89
+ # Process with Hugging Face API
90
+ result = process_image_with_hf(img_bytes)
91
+ st.session_state['ocr_result'] = result
92
+ except Exception as e:
93
+ st.error(f"Error processing image: {str(e)}")
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Main content area for results
96
  if 'ocr_result' in st.session_state:
97
  st.markdown(st.session_state['ocr_result'])
98
  else:
99
  st.info("Upload an image and click 'Extract Text' to see the results here.")
100
+
101
  # Footer
102
  st.markdown("---")
103
+ st.markdown("Made with using Gemma-3 Vision Model | [Report an Issue](https://github.com/bulentsoykan/streamlit-OCR-app/issues)")