import cv2
import dlib
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from predict_api import ImagePredictor


def draw_chinese_text(image, text, position, color=(0, 255, 0)):
    # Convert cv2 image to PIL image
    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Create a blank image with alpha channel, same size as original image
    blank = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))

    # Create a draw object and draw text on the blank image
    draw = ImageDraw.Draw(blank)
    font = ImageFont.truetype("simhei.ttf", 20)
    draw.text(position, text, fill=color, font=font)

    # Composite the original image with the blank image
    image_pil = Image.alpha_composite(image_pil.convert('RGBA'), blank)

    # Convert PIL image back to cv2 image
    image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)

    return image


# Initialize face detector
detector = dlib.get_frontal_face_detector()

# Initialize ImagePredictor
predictor = ImagePredictor(model_path="./best.pth", class_indices_path="./class_indices.json")

# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = detector(gray)

    for rect in faces:
        # Get the coordinates of the face rectangle
        x = rect.left()
        y = rect.top()
        w = rect.width()
        h = rect.height()

        # Crop the face from the frame
        face = frame[y:y+h, x:x+w]

        # Predict the emotion of the face
        result = predictor.predict(face)

        # Get the emotion with the highest score
        emotion = result["result"]["name"]

        # Draw the rectangle around the face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

        # Put the emotion text above the rectangle  cv2
        # cv2.putText(frame, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Put the emotion text above the rectangle PIL
        frame = draw_chinese_text(frame, emotion, (x, y))

    # Display the frame
    cv2.imshow("Emotion Recognition", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and destroy all windows
cap.release()
cv2.destroyAllWindows()