Mar-31-2024, 11:35 AM
Hello all
I am trying to piece together some Python code to track 2 faces. All going OK so far, considering first effort and I have no idea what I am doing!
Basically, I am lifting code I find on the web and trying to understand how it works. Not easy, as I have not really found a clear explanation of how Mediapipe works.
So, I am sure my code is horrible, but I working my way through. I am sure some of my annotation is incorrect (like explaining Normalisation)
There is a lot of redundancy in the code at the moment... please excuse that.
ANYWAY. It picks up multiple faces fine. I am now trying to work out how close the faces are to the screen.
Lines 131-143 are grabbing the width of the eyes and that is working. The print statement prints the face number and then the width of the eye (line 143), but the eye width is only ever the one closest to the camera? (the face number changes correctly)
Can someone point to what is incorrect?
I though it might be the statement to_check = results.multi_face_landmarks[0].landmark was only looking at the first face (multi_face_landmarks[0]), but I might be wrong.
I hope I have formatted this first question correctly!
I am trying to piece together some Python code to track 2 faces. All going OK so far, considering first effort and I have no idea what I am doing!
Basically, I am lifting code I find on the web and trying to understand how it works. Not easy, as I have not really found a clear explanation of how Mediapipe works.
So, I am sure my code is horrible, but I working my way through. I am sure some of my annotation is incorrect (like explaining Normalisation)
There is a lot of redundancy in the code at the moment... please excuse that.
ANYWAY. It picks up multiple faces fine. I am now trying to work out how close the faces are to the screen.
Lines 131-143 are grabbing the width of the eyes and that is working. The print statement prints the face number and then the width of the eye (line 143), but the eye width is only ever the one closest to the camera? (the face number changes correctly)
Can someone point to what is incorrect?
I though it might be the statement to_check = results.multi_face_landmarks[0].landmark was only looking at the first face (multi_face_landmarks[0]), but I might be wrong.
I hope I have formatted this first question correctly!
import cv2
import mediapipe as mp
import serial # Serial for comms to the Arduino
import time # For delays etc
import platform # Details of the system we are running
import atexit # Clean method of exiting
import serial.tools.list_ports # Serial port information
import sys
import numpy as np # Processes numbers, strings and arrays
import keyboard # Allows use of the keyboard
from vpython import *
#import pickle # Allows you to store data
print(platform.system(), platform.release())
print("Python version " + platform.python_version())
print("")
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh
showvideo = True
faces = 0
#--------------------- Arduino communications -----------------------
def findArduinoUnoPort(): # Check all the comm ports for an Arduino
portList = list(serial.tools.list_ports.comports())
for port in portList:
if "VID:PID=2341:0043" in port[0]\
or "VID:PID=2341:0043" in port[1]\
or "VID:PID=2341:0043" in port[2]:
print(port)
print(port[0])
print(port[1])
print(port[2])
return port[0]
def doAtExit():
if serialUno.isOpen():
serialUno.close()
print("Close serial")
print("serialUno.isOpen() = " + str(serialUno.isOpen()))
atexit.register(doAtExit)
unoPort = findArduinoUnoPort()
if not unoPort:
print("No Arduino found")
#sys.exit("No Arduino found - Exiting system")
#print("Arduino found: " + unoPort)
#print()
serialArduino = serial.Serial(unoPort, 9600)
print("serialUno.isOpen() = " + str(serialArduino.isOpen()))
#-----------------------------------------------------------------------
# For webcam input:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
#-- Select a video source -
#cap = cv2.VideoCapture('dance2.mp4') # Read local video file
cap = cv2.VideoCapture(0) # Laptop built-in webcam
#cap = cv2.VideoCapture(1, cv2.CAP_DSHOW) # External USB camera
#cap = cv2.VideoCapture('rtsp://192.168.1.64/1') # Capture from an IP camera
#cap = cv2.VideoCapture('rtsp://username:[email protected]/1') # Capture from an IP camera with password + username
with mp_face_mesh.FaceMesh(
max_num_faces=20, # Max amount of faces detectable
refine_landmarks=True, # Whether it uses all the face land marks
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as face_mesh:
while cap.isOpened():
success, image = cap.read()
width = 1280 # Resize the webcam feed
height = 720
dim = (width, height)
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = face_mesh.process(image)
faces = 0 # Reset the faces count
black = cv2.imread("black.png") # Create a blank page for the 'video off' feed
if showvideo == True: # Show live video feed
image.flags.writeable = True # Draw the face mesh annotations on the image.
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
#mp_drawing.draw_landmarks( # Draw the entire face (mesh)
#image=image,
#landmark_list=face_landmarks,
#connections=mp_face_mesh.FACEMESH_TESSELATION,
#landmark_drawing_spec=None,
#connection_drawing_spec=mp_drawing_styles
#.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks( # Draw just the outlines
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks( # Draw the eyes
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
faces+=1 # Increase the faces count for every face identified
#---------- Track the width of the eyes (to determine range) -------------
def invert_normalization(x, y, w, h): # Re-correct the Z value (depth) for the new scale of the image. Z = Smaller = Nearer the camera
return int(x * w), int(y * h) # Normalization is the conversion of larger variables of detail to a smaller range for easier processing. E.G. 0-255 becomes 0-1
to_check = results.multi_face_landmarks[0].landmark
image_width = image.shape[1]
image_height = image.shape[0]
#up_left_x, up_left_y = invert_normalization(x=to_check[71].x, y=to_check[71].y, w=image_width, h=image_height) # https://github.com/edge7/Eye-Region-Extraction-Toolbox/blob/main/runner/main.py
#up_right_x, up_right_y = invert_normalization(x=to_check[301].x, y=to_check[71].y, w=image_width, h=image_height)
down_left_x, down_left_y = invert_normalization(x=to_check[71].x, y=to_check[123].y, w=image_width, h=image_height)
down_right_x, down_right_y = invert_normalization(x=to_check[301].x, y=to_check[123].y, w=image_width, h=image_height)
width_eyes = (down_right_x - down_left_x)
print(faces,width_eyes)
#---------- Determine action -----------
cv2.putText(image, str("Face recognition (live video)"), (25,50), cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0), 2) # Text
cv2.putText(image, str("Number of faces: "+ str(faces)), (25,80), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
if (faces == 1):
cv2.putText(image, str("You need another friend!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
if (faces == 2):
cv2.putText(image, str("Hello you two!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
cv2.rectangle(image, (12, 12), (1266, 706), (0, 255, 0), 20)
if (faces > 2):
cv2.putText(image, str("TOO MANY PEOPLE!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,255), 2) # Text
else: # Show black screen
image.flags.writeable = True
image = cv2.cvtColor(black, cv2.COLOR_RGB2BGR)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks( # Draw the entire face (mesh)
image=black,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_tesselation_style())
mp_drawing.draw_landmarks( # Draw just the outlines
image=black,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_contours_style())
mp_drawing.draw_landmarks( # Draw the eyes
image=black,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_IRISES,
landmark_drawing_spec=None,
connection_drawing_spec=mp_drawing_styles
.get_default_face_mesh_iris_connections_style())
faces+=1
#---------- Determine action -----------
cv2.putText(black, str("Face recognition (stealth)"), (25,50), cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255), 2) # Text
cv2.putText(black, str("Number of faces: "+ str(faces)), (25,80), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
if (faces == 1):
cv2.putText(black, str("You need another friend!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
if (faces == 2):
cv2.putText(black, str("Hello you two!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
cv2.rectangle(black, (12, 12), (1266, 706), (0, 255, 0), 20)
if (faces > 2):
cv2.putText(black, str("TOO MANY PEOPLE!"), (25,110), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255), 2) # Text
#------------ Toggle video mode ---------------
if keyboard.is_pressed('v') and showvideo == True:
showvideo = False
while keyboard.is_pressed('v'):
cv2.waitKey(1)
if keyboard.is_pressed('v') and showvideo == False:
showvideo = True
while keyboard.is_pressed('v'):
cv2.waitKey(1)
if showvideo == True:
cv2.imshow('MediaPipe Face Mesh', image) # Display the live video feed (window name, image)
#cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1)) # Horizontally flip the image (selfie mode)
else:
cv2.imshow('MediaPipe Face Mesh', black) # Display the blank background image
#cv2.imshow('MediaPipe Face Mesh', cv2.flip(black, 1)) # Horizontally flip the image (selfie mode)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
