import pickle from ultralytics import YOLO import cv2 import mediapipe as mp import numpy as np model = YOLO('best.pt') cap = cv2.VideoCapture(0) mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3) language = '' labels_dict = {0: '0', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z', 26: 'del', 27: 'nothing', 28: 'space'} while True: data_aux = [] x_ = [] y_ = [] ret, frame = cap.read() if not ret: print("Failed to capture frame. Exiting...") break H, W, _ = frame.shape frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = hands.process(frame_rgb) if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks( frame, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style()) for hand_landmarks in results.multi_hand_landmarks: for i in range(len(hand_landmarks.landmark)): x = hand_landmarks.landmark[i].x y = hand_landmarks.landmark[i].y x_.append(x) y_.append(y) for i in range(len(hand_landmarks.landmark)): x = hand_landmarks.landmark[i].x y = hand_landmarks.landmark[i].y data_aux.append(x - min(x_)) data_aux.append(y - min(y_)) x1 = int(min(x_) * W) - 10 y1 = int(min(y_) * H) - 10 x2 = int(max(x_) * W) - 10 y2 = int(max(y_) * H) - 10 prediction = model.predict(frame, conf=0.25, iou=0.45) names_dict = prediction[0].names probs = prediction[0].probs.data.numpy() detected_gesture = names_dict[np.argmax(probs)] print(names_dict[np.argmax(probs)]) print("Gesture:", detected_gesture) if detected_gesture == 'A': language = 'Arabic' elif detected_gesture == 'B': language = 'Bengali' elif detected_gesture == 'C': language = 'Chinese' elif detected_gesture == 'D': language = 'Dutch' elif detected_gesture == 'E': language = 'English' elif detected_gesture == 'F': language = 'French' elif detected_gesture == 'G': language = 'German' elif detected_gesture == 'H': language = 'Hindi' elif detected_gesture == 'I': language = 'Italian' elif detected_gesture == 'J': language = 'Japanese' elif detected_gesture == 'K': language = 'Korean' elif detected_gesture == 'L': language = 'Latin' elif detected_gesture == 'M': language = 'Malay' elif detected_gesture == 'N': language = 'Norwegian' elif detected_gesture == 'O': language = 'Oriya' elif detected_gesture == 'P': language = 'Polish' elif detected_gesture == 'Q': language = 'Quechua' elif detected_gesture == 'R': language = 'Russian' elif detected_gesture == 'S': language = 'Spanish' elif detected_gesture == 'T': language = 'Turkish' elif detected_gesture == 'U': language = 'Urdu' elif detected_gesture == 'V': language = 'Vietnamese' elif detected_gesture == 'W': language = 'Welsh' elif detected_gesture == 'X': language = 'Xhosa' elif detected_gesture == 'Y': language = 'Yoruba' elif detected_gesture == 'Z': language = 'Zulu' cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4) cv2.putText(frame, language, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA) cv2.imshow('frame', frame) cv2.waitKey(1) cap.release() cv2.destroyAllWindows()