# version 5.4 # All rights reserved © e-Learning Development Laboratory, The University of Hong Kong 2023 def take_photo(name="photo.jpg", sec=0.3, show_result=True): from IPython.display import clear_output, Javascript, Image from google.colab import output from base64 import b64decode #clear_output() display(Javascript(""" const sleep = time => new Promise(resolve => setTimeout(resolve, time)) async function takephotojs(sec) { const video = document.createElement("video"); video.style.display = "block"; const stream = await navigator.mediaDevices.getUserMedia({video: true}); document.body.appendChild(video); video.srcObject = stream; await video.play(); google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true); await sleep(sec) const canvas = document.createElement("canvas"); canvas.width = video.videoWidth; canvas.height = video.videoHeight; canvas.getContext("2d").drawImage(video, 0, 0); stream.getVideoTracks()[0].stop(); video.remove(); return canvas.toDataURL("image/jpeg", 0.8); } """)) data = output.eval_js("takephotojs({})".format(sec*1000)) binary = b64decode(data.split(",")[1]) with open(name, "wb") as f: f.write(binary) if show_result: display(Image(name)) def recognise(image, detection_type, show_result=True): if detection_type == "FACE_DETECTION" or detection_type == "FACE": return detect_faces(image, show_result) elif detection_type == "OBJECT_LOCALIZATION" or detection_type == "OBJECT": return detect_objects(image, show_result) elif detection_type == "TEXT_DETECTION" or detection_type == "TEXT": return detect_texts(image, show_result) elif detection_type == "LANDMARK_DETECTION" or detection_type == "LANDMARK": return detect_landmarks(image, show_result) elif detection_type == "LABEL_DETECTION" or detection_type == "LABEL": return detect_labels(image, show_result) else: print("Error. No such detection type") def detect_faces(filename, show_result): from google.cloud import vision import io import PIL from PIL import ImageDraw with io.open(filename, "rb") as image_file: client = vision.ImageAnnotatorClient() image = vision.Image(content=image_file.read()) response = client.face_detection(image=image) image = PIL.Image.open(filename) draw = PIL.ImageDraw.Draw(image) likelihood_name = ('UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY') result = [] for face in response.face_annotations: face_result = dict(detection_confidence = format(face.detection_confidence, '.3f'), joy_likelihood = likelihood_name[face.joy_likelihood], anger_likelihood = likelihood_name[face.anger_likelihood], surprise_likelihood = likelihood_name[face.surprise_likelihood]) result = result + [face_result] box = [(int(vertex.x), int(vertex.y)) for vertex in face.bounding_poly.vertices] draw.text((box[0][0], box[0][1] - 10), "ID:" + str(len(result)-1) + " (" + format(face.detection_confidence, '.3f') + ")") draw.rectangle([box[0], box[2]],outline="green", width=3) image.save(filename.split(".")[0]+"_result.png") if show_result: from IPython.display import Image display(Image(filename.split(".")[0]+"_result.png")) if len(result) == 0: return "No result" else: return dict(no_of_faces=len(result), faces=result) def detect_objects(filename, show_result): from google.cloud import vision import io import PIL from PIL import ImageDraw with io.open(filename, "rb") as image_file: client = vision.ImageAnnotatorClient() image = vision.Image(content=image_file.read()) objects = client.object_localization(image=image).localized_object_annotations image = PIL.Image.open(filename) draw = PIL.ImageDraw.Draw(image) result = [] for theobject in objects: object_result = dict(detected_object = theobject.name, detection_confidence = theobject.score) result = result + [object_result] box = [(vertex.x*image.width, vertex.y*image.height) for vertex in theobject.bounding_poly.normalized_vertices] draw.text((box[0][0], box[0][1] - 10), "ID:" + str(len(result)-1) + " (" + theobject.name + ")") draw.rectangle([box[0], box[2]],outline="green", width=3) image.save(filename.split(".")[0]+"_result.png") if show_result: from IPython.display import Image display(Image(filename.split(".")[0]+"_result.png")) if len(result) == 0: return "No result" else: return dict(no_of_objects=len(result), objects=result) def detect_texts(filename, show_result): from google.cloud import vision import io import PIL from PIL import ImageDraw with io.open(filename, "rb") as image_file: client = vision.ImageAnnotatorClient() image = vision.Image(content=image_file.read()) texts = client.text_detection(image=image).text_annotations image = PIL.Image.open(filename) draw = PIL.ImageDraw.Draw(image) result = [] for text in texts[1:]: text_result = dict(detected_text = text.description) result = result + [text_result] box = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices] draw.rectangle([box[0], box[2]],outline="green", width=3) image.save(filename.split(".")[0]+"_result.png") if show_result: from IPython.display import Image display(Image(filename.split(".")[0]+"_result.png")) if len(result) == 0: return "No result" else: return dict(detected_text=texts[0].description, texts=result) def detect_landmarks(filename, show_result): from google.cloud import vision import io import PIL from PIL import ImageDraw with io.open(filename, "rb") as image_file: client = vision.ImageAnnotatorClient() image = vision.Image(content=image_file.read()) landmarks = client.landmark_detection(image=image).landmark_annotations image = PIL.Image.open(filename) draw = PIL.ImageDraw.Draw(image) result = [] for landmark in landmarks: for location in landmark.locations: landmark_location = dict(latitude=location.lat_lng.latitude, longitude=location.lat_lng.longitude) landmark_result = dict(detected_landmark = landmark.description, location=landmark_location) result = result + [landmark_result] box = [(vertex.x, vertex.y) for vertex in landmark.bounding_poly.vertices] draw.rectangle([box[0], box[2]],outline="green", width=3) image.save(filename.split(".")[0]+"_result.png") if show_result: from IPython.display import Image display(Image(filename.split(".")[0]+"_result.png")) if len(result) == 0: return "No result" else: return result def detect_labels(filename, show_result): from google.cloud import vision import io import PIL from PIL import ImageDraw with io.open(filename, "rb") as image_file: client = vision.ImageAnnotatorClient() image = vision.Image(content=image_file.read()) labels = client.label_detection(image=image).label_annotations image = PIL.Image.open(filename) draw = PIL.ImageDraw.Draw(image) result = [] for label in labels: label_result = dict(label=label.description) result = result + [label_result] box = [(int(vertex.x), int(vertex.y)) for vertex in label.bounding_poly.vertices] image.save(filename.split(".")[0]+"_result.png") if show_result: from IPython.display import Image display(Image(filename.split(".")[0]+"_result.png")) if len(result) == 0: return "No result" else: return result def getLabels_inString(result): try: labels = "" if result != "No result": for all_ in result: labels = labels + all_["label"] + "," else: labels = "No result" return labels[:-1] except: return "Error. Wrong detection type" def getLabels(result): return getLabels_inString(result) def getLabels_inList(result): return getLabels_inString(result).split(",") def getObjects_inString(result): try: objects = "" if result != "No result": for all_ in result["objects"]: objects = objects + all_["detected_object"] + "," else: objects = "No result" return objects[:-1] except: return "Error. Wrong detection type" def getObjects(result): return getObjects_inString(result) def getObjects_inList(result): return getObjects_inString(result).split(",") def getTexts_inString(result): try: texts = "" if result != "No result": return result["detected_text"] else: texts = "No result" return texts[:-1] except: return "Error. Wrong detection type" def getTexts(result): return getTexts_inString(result) def getTexts_inList(result): return getTexts_inString(result).split("\n") def getLandmarks_inString(result): try: landmarks = "" if result != "No result": for all in result: landmarks = landmarks + all["detected_landmark"] + "," else: landmarks = "No result" return landmarks[:-1] except: return "Error. Wrong detection type" def getLandmarks(result): return getLandmarks_inString(result) def getLandmarks_inList(result): return getLandmarks_inString(result).split(",") def getFaces(result): try: if result != "No result": faces = result["no_of_faces"] else: faces = 0 return faces except: return "Error. Wrong detection type" def isJoy(result, id): try: if result != "No result": joy = result["faces"][id]["joy_likelihood"] else: joy = "No result" return joy except: return "Error. Wrong detection type" def isAnger(result, id): try: if result != "No result": joy = result["faces"][id]["anger_likelihood"] else: joy = "No result" return joy except: return "Error. Wrong detection type" def isSurprise(result, id): try: if result != "No result": joy = result["faces"][id]["surprise_likelihood"] else: joy = "No result" return joy except: return "Error. Wrong detection type" model = "" class_names = "" def load_tmmodel(): from keras.models import load_model global model model = load_model("keras_model.h5", compile=False) global class_names class_names = open("labels.txt", "r").readlines() def predict(name="photo.jpg"): from PIL import Image, ImageOps import numpy as np data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32) image = Image.open(name).convert("RGB") image = ImageOps.fit(image, (224, 224), Image.LANCZOS) image_array = np.asarray(image) normalized_image_array = (image_array.astype(np.float32) / 127.5) - 1 data[0] = normalized_image_array prediction = model.predict(data) index = np.argmax(prediction) class_name = class_names[index] confidence_score = prediction[0][index] from IPython.display import clear_output, Image clear_output() display(Image(name)) import time time.sleep(2) return class_name.split(" ")[1].replace("\n", ""), confidence_score