import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import torch.nn.functional as F # Load the pre-trained model and tokenizer using gr.load model = gr.load("models/Ahmed235/roberta_classification") # Tokenizer can be loaded using transformers directly tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification") device = torch.device("cpu") model = model.to(device) # Move the model to the CPU def extract_text_from_pptx(file_path): # Assume your implementation for text extraction remains the same pass def predict_pptx_content(file_path): try: extracted_text = extract_text_from_pptx(file_path) cleaned_text = re.sub(r'\s+', ' ', extracted_text) # Tokenize and encode the cleaned text input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt") input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU # Perform inference with torch.no_grad(): outputs = model(**input_encoding) logits = outputs.logits probabilities = F.softmax(logits, dim=1) predicted_label_id = torch.argmax(logits, dim=1).item() predicted_label = model.config.id2label[predicted_label_id] predicted_probability = probabilities[0][predicted_label_id].item() prediction = { "Predicted Label": predicted_label, "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}", } return prediction except Exception as e: # Log the error details print(f"Error in predict_pptx_content: {e}") return {"error": str(e)} # Define the Gradio interface iface = gr.Interface( fn=predict_pptx_content, inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"), outputs=["text", "text"], # Predicted Label, Evaluation live=False, # Change to True for one-time analysis title="