GestureME¶
Function to open the camera stream, capture image and save, close the stream¶
In [1]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
from IPython.display import display, Image
import threading
running = False
cap = None
display_handle = None
def view():
global running, cap, display_handle
display_handle = display(None, display_id=True)
while running:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
_, frame_encoded = cv2.imencode('.jpeg', frame)
display_handle.update(Image(data=frame_encoded.tobytes()))
cap.release()
display_handle.update(None)
def start_stream():
global running, cap
if not running:
running = True
cap = cv2.VideoCapture(0)
thread = threading.Thread(target=view)
thread.start()
def stop_stream():
global running
running = False
def capture_image():
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
cap.release()
if ret:
filename = 'captured_image.jpg'
cv2.imwrite(filename, frame)
print(f"Image saved as {filename}")
# Display the captured image
img_display = cv2.imread(filename)
plt.imshow(cv2.cvtColor(img_display, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
In [2]:
start_stream() # To start the video stream
None
In [3]:
capture_image() # To capture an image from the webcam
Image saved as captured_image.jpg
In [4]:
#stop_stream() # To stop the video stream
In [ ]:
# Adapted from OpenAI's Vision example
from openai import OpenAI
import base64
import requests
# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
# Ask the user for a path on the filesystem:
path = input("Enter a local filepath to an image: ")
# Read the image and encode it to base64:
base64_image = ""
try:
image = open(path.replace("'", ""), "rb").read()
base64_image = base64.b64encode(image).decode("utf-8")
except:
print("Couldn't read the image. Make sure the path is correct and the file exists.")
exit()
completion = client.chat.completions.create(
model="cjpais/llava-1.6-mistral-7b-gguf",
messages=[
{
"role": "system",
"content": "This is a chat between a user and an assistant. The assistant is helping the user to describe an image.",
},
{
"role": "user",
"content": [
{"type": "text", "text": "What does this hand gesture image show?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
max_tokens=1000,
stream=True
)
for chunk in completion:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
In [ ]: