import cv2 import base64 import numpy as np from flask import Flask, render_template, request, jsonify, send_from_directory import time import mediapipe as mp from mediapipe.framework.formats import landmark_pb2 from mediapipe import solutions from tflite_support.task import vision as vision2 from tflite_support.task import core, processor from numpy.linalg import norm # Flask app setup app = Flask(__name__) # Global variables for letter detection results letter_result = 0 result_to_show = 0 cresult_to_show = 0 letterscore = 0 no_hand_flag = 1 # Initialize MediaPipe hand landmark detection BaseOptions = mp.tasks.BaseOptions HandLandmarker = mp.tasks.vision.HandLandmarker HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult VisionRunningMode = mp.tasks.vision.RunningMode # Load your TFLite models (adjust paths if needed) cbase_options = core.BaseOptions(file_name="./exported/model.tflite") # New model ccbase_options = core.BaseOptions(file_name="./exported/word.tflite") # Old model or word model cclassification_options = processor.ClassificationOptions(max_results=1) coptions = vision2.ImageClassifierOptions(base_options=cbase_options, classification_options=cclassification_options) ccoptions = vision2.ImageClassifierOptions(base_options=ccbase_options, classification_options=cclassification_options) cclassifier = vision2.ImageClassifier.create_from_options(coptions) ccclassifier = vision2.ImageClassifier.create_from_options(ccoptions) # Callback to store MediaPipe detection results asynchronously RESULT = None def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int): global RESULT RESULT = result options = HandLandmarkerOptions( base_options=BaseOptions(model_asset_path='hand_landmarker.task'), running_mode=VisionRunningMode.LIVE_STREAM, result_callback=print_result) detector = mp.tasks.vision.HandLandmarker.create_from_options(options) # Utility functions for image processing def data_uri_to_image(data_uri): header, encoded = data_uri.split(',', 1) decoded_data = base64.b64decode(encoded) nparr = np.frombuffer(decoded_data, np.uint8) image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) return image def image_to_data_uri(image): _, buffer = cv2.imencode('.jpg', image) image_bytes = buffer.tobytes() base64_encoded = base64.b64encode(image_bytes).decode('utf-8') return f"data:image/jpeg;base64,{base64_encoded}" def draw_landmarks_on_image(rgb_image, detection_result): hand_landmarks_list = detection_result.hand_landmarks annotated_image = np.copy(rgb_image) image_height, image_width, _ = annotated_image.shape for idx in range(len(hand_landmarks_list)): hand_landmarks = hand_landmarks_list[idx] hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList() hand_landmarks_proto.landmark.extend([ landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks ]) solutions.drawing_utils.draw_landmarks( annotated_image, hand_landmarks_proto, solutions.hands.HAND_CONNECTIONS, solutions.drawing_styles.get_default_hand_landmarks_style(), solutions.drawing_styles.get_default_hand_connections_style() ) return annotated_image # Letter list - modify if needed letter_list = [chr(i) for i in range(65, 91)] + ['#'] # A-Z + # # Isẹ̀kiri dictionary (Example mapping, update with real words) isekiri_dict = { 'A': 'Àṣẹ', 'B': 'Bí', 'C': 'Ṣe', 'D': 'Dá', 'E': 'Ẹ̀', 'F': 'Fẹ́', 'G': 'Gba', 'H': 'Hàn', 'I': 'Ìyà', 'J': 'Jẹ', 'K': 'Kọ', 'L': 'Lá', 'M': 'Má', 'N': 'Ná', 'O': 'Ọ̀', 'P': 'Pẹ̀', 'Q': 'Kù', # approximate since Q rarely used 'R': 'Rà', 'S': 'Ṣá', 'T': 'Tẹ', 'U': 'Ú', 'V': 'Vẹ', 'W': 'Wá', 'X': 'Ẹ́s', 'Y': 'Yá', 'Z': 'Zà', '#': '#' } # Routes for web UI and models @app.route('/') def index(): return render_template('index.html') @app.route('/exported/') def send_model(filename): return send_from_directory('exported', filename) # Video frame processing API (ASL detection) @app.route('/api/data', methods=['POST']) def handle_video_frame(): global letter_result, result_to_show, cresult_to_show, letterscore, no_hand_flag frame_data_uri = request.json.get('key') if not frame_data_uri: return jsonify({'error': 'No frame data received'}), 400 frame = data_uri_to_image(frame_data_uri) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame) try: detection_result = detector.detect_async(mp_image, mp.Timestamp.from_seconds(time.time()).value) global RESULT if RESULT is None: return jsonify({'result': '_', 'frame': frame_data_uri}) annotated_image = draw_landmarks_on_image(frame, RESULT) if RESULT.handedness: no_hand_flag = 0 # If right hand detected, classify using models if RESULT.handedness[0][0].display_name == 'Right': tf_image = vision2.TensorImage.create_from_array(frame) classification_result = cclassifier.classify(tf_image) cclassification_result = ccclassifier.classify(tf_image) result_to_show = classification_result.classifications[0].categories[0].category_name cresult_to_show = cclassification_result.classifications[0].categories[0].category_name # Simple decision logic between old and new models if cclassification_result.classifications[0].categories[0].score > classification_result.classifications[0].categories[0].score: letter_result = cresult_to_show else: letter_result = result_to_show letterscore = max( classification_result.classifications[0].categories[0].score, cclassification_result.classifications[0].categories[0].score ) else: letter_result = '_' else: letter_result = '_' except Exception as e: print("Detection error:", e) letter_result = '_' annotated_image = frame frame_out = image_to_data_uri(annotated_image) return jsonify({"result": letter_result, "frame": frame_out}) # Isẹ̀kiri translation API @app.route('/api/translate', methods=['POST']) def translate_to_isekiri(): data = request.json text = data.get('text', '') # Translate each letter to Isẹ̀kiri word or keep as is if unknown translated = ' '.join(isekiri_dict.get(ch.upper(), ch) for ch in text if ch.strip()) return jsonify({'isekiri': translated}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)