chandini2595 commited on
Commit
39c8302
·
1 Parent(s): e1fbe20
Files changed (4) hide show
  1. app.py +7 -1
  2. insert_dummy_data.py +0 -51
  3. packages.txt +2 -1
  4. requirements.txt +1 -0
app.py CHANGED
@@ -13,6 +13,7 @@ import pytesseract
13
  import re
14
  from openai import OpenAI
15
  import os
 
16
  from dotenv import load_dotenv
17
  from chatbot_utils import ask_receipt_chatbot
18
  import time
@@ -178,7 +179,12 @@ def main():
178
 
179
  if uploaded_file is not None:
180
  # Display uploaded image
181
- image = Image.open(uploaded_file)
 
 
 
 
 
182
  st.image(image, caption="Uploaded Document", width=600)
183
 
184
  # Process button
 
13
  import re
14
  from openai import OpenAI
15
  import os
16
+ from pdf2image import convert_from_bytes
17
  from dotenv import load_dotenv
18
  from chatbot_utils import ask_receipt_chatbot
19
  import time
 
179
 
180
  if uploaded_file is not None:
181
  # Display uploaded image
182
+ if uploaded_file.type == "application/pdf":
183
+ # Convert first page of PDF to image
184
+ images = convert_from_bytes(uploaded_file.read())
185
+ image = images[0] # Use the first page
186
+ else:
187
+ image = Image.open(uploaded_file)
188
  st.image(image, caption="Uploaded Document", width=600)
189
 
190
  # Process button
insert_dummy_data.py DELETED
@@ -1,51 +0,0 @@
1
- import boto3
2
-
3
- # Initialize DynamoDB resource (ensure AWS credentials and region are set)
4
- dynamodb = boto3.resource('dynamodb', region_name='us-east-1') # Change region if needed
5
- table = dynamodb.Table('Receipts') # Replace with your table name
6
-
7
- # List of dummy items to insert with meaningful receipt numbers
8
- dummy_items = [
9
- {
10
- 'receipt_no': 'RCPT-2024-0001',
11
- 'amount_paid': '100.00',
12
- 'date': '2024-01-01',
13
- 'name': 'John Doe',
14
- 'product': 'Widget A'
15
- },
16
- {
17
- 'receipt_no': 'RCPT-2024-0002',
18
- 'amount_paid': '250.50',
19
- 'date': '2024-02-15',
20
- 'name': 'Jane Smith',
21
- 'product': 'Gadget B'
22
- },
23
- {
24
- 'receipt_no': 'RCPT-2024-0003',
25
- 'amount_paid': '75.25',
26
- 'date': '2024-03-10',
27
- 'name': 'Alice Johnson',
28
- 'product': 'Thingamajig C'
29
- },
30
- {
31
- 'receipt_no': 'RCPT-2024-0004',
32
- 'amount_paid': '180.00',
33
- 'date': '2024-04-05',
34
- 'name': 'Bob Lee',
35
- 'product': 'Gizmo D'
36
- },
37
- {
38
- 'receipt_no': 'RCPT-2024-0005',
39
- 'amount_paid': '320.75',
40
- 'date': '2024-05-20',
41
- 'name': 'Carol King',
42
- 'product': 'Device E'
43
- }
44
- ]
45
-
46
- # Insert each item
47
- for item in dummy_items:
48
- table.put_item(Item=item)
49
- print(f"Inserted: {item['receipt_no']}")
50
-
51
- print("Dummy data inserted successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
packages.txt CHANGED
@@ -1 +1,2 @@
1
- tesseract-ocr
 
 
1
+ tesseract-ocr
2
+ poppler-utils
requirements.txt CHANGED
@@ -33,3 +33,4 @@ streamlit
33
  plotly==5.18.0
34
  matplotlib
35
  scikit-learn
 
 
33
  plotly==5.18.0
34
  matplotlib
35
  scikit-learn
36
+ pdf2image