Spaces:
Runtime error
Runtime error
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer | |
import gradio as gr | |
import cv2 | |
import easyocr | |
from simple_salesforce import Salesforce | |
import re | |
# Salesforce credentials | |
Salesforce_User_Name = 'sathkruthatech@hms.com' # Your Salesforce username | |
Salesforce_Password = 'Sathkrutha@06' | |
SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com' | |
SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw' | |
# Initialize EasyOCR reader for text extraction | |
reader = easyocr.Reader(['en']) | |
# Load pre-trained LayoutLM model and tokenizer | |
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") | |
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") | |
# Salesforce Connection Setup | |
sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN) | |
# Function to extract text using EasyOCR and process with LayoutLM | |
def extract_patient_info(image): | |
# Convert the uploaded image to RGB (required by LayoutLM) | |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
# Use EasyOCR to extract text from the image | |
result = reader.readtext(image_rgb) | |
# Debug: Print OCR result | |
print("OCR Result:", result) | |
# Extracted text from OCR | |
extracted_text = " ".join([detection[1] for detection in result]) | |
# Debug: Print the extracted text | |
print("Extracted Text:", extracted_text) | |
# Extract relevant details (Name, Age, Gender, Phone number) from the extracted text | |
details = extract_details_from_text(extracted_text) | |
# Debug: Print parsed details | |
print("Parsed Details:", details) | |
# Create a record in Salesforce using the extracted details | |
create_salesforce_record(details) | |
# Return the extracted text for display | |
return extracted_text | |
# Function to extract details from the extracted text using regex | |
def extract_details_from_text(extracted_text): | |
# Regex patterns to match Name, Age, Gender, and Phone number | |
details = {} | |
# Extract Name | |
name_match = re.search(r"Name[:\s]*([A-Za-z\s]+)", extracted_text) | |
if name_match: | |
details['Name'] = name_match.group(1) | |
else: | |
print("Error: Name not found!") | |
# Extract Age | |
age_match = re.search(r"Age[:\s]*([\d]+)", extracted_text) | |
if age_match: | |
details['Age'] = age_match.group(1) | |
else: | |
print("Error: Age not found!") | |
# Extract Gender | |
gender_match = re.search(r"Gender[:\s]*(Male|Female)", extracted_text, re.IGNORECASE) | |
if gender_match: | |
details['Gender'] = gender_match.group(1) | |
else: | |
print("Error: Gender not found!") | |
# Extract Phone number | |
phone_match = re.search(r"Phone number[:\s]*([\d]+)", extracted_text) | |
if phone_match: | |
details['Phone Number'] = phone_match.group(1) | |
else: | |
print("Error: Phone number not found!") | |
return details | |
# Function to create a record in Salesforce using the extracted details | |
def create_salesforce_record(details): | |
# Prepare the data to be inserted into Salesforce | |
data = { | |
'Name__c': details['Name'], | |
'Age__c': int(details['Age']), | |
'Gender__c': details['Gender'], | |
'Phone_Number__c': details['Phone Number'] | |
} | |
# Debug: Print the data before inserting into Salesforce | |
print("Data to be inserted into Salesforce:", data) | |
try: | |
# Create a new record in Salesforce | |
sf.Patient_Registration__c.create(data) | |
print("Salesforce record created successfully!") | |
except Exception as e: | |
# Handle any exceptions during Salesforce record creation | |
print(f"Error creating Salesforce record: {e}") | |
# Gradio interface setup | |
with gr.Blocks() as demo: | |
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration") | |
# Image upload component | |
image_input = gr.Image(type="numpy", label="Upload Image") | |
# Output textboxes to display the extracted information | |
output_text = gr.Textbox(label="Extracted Text") | |
# Button to trigger image processing and text extraction | |
process_button = gr.Button("Process Image") | |
# When the button is clicked, process the image and show results in textboxes | |
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
demo.launch() | |