yolov8-pose:关键点姿态检测

环境&安装

同上文yolov8:火灾检测

模型使用yolov8n-pose

数据标注

标注工具:labelme

对图像中的目标(人物)及其关键点进行标记,包括1个目标类别和17个关键点类别

数据格式转换

将labelme数据格式转为yolo格式,通用转换代码:

1
2
3
# TODO:
# 参考yolov8-火灾检测,未完待续
...

创建训练yaml文件

参考yolov8n-pose.yaml

1
2
3
4
5
6
7
8
9
10
11
12
train: /exp/work/video/yolov8/datasets/human-pose/images/train #训练集文件夹
val: /exp/work/video/yolov8/datasets/human-pose/images/val # 验证集文件夹
test: /exp/work/video/yolov8/datasets/human-pose/images/val # 测试集文件夹
nc: 1 # 分类数

# 关键点,每个关键点有 X Y 是否可见 三个参数
# 可见性:2-可见不遮挡 1-遮挡 0-没有点
kpt_shape: [17, 3]

# 框的类别(对于关键点检测,只有一类)
names:
0: people

训练

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
from ultralytics import YOLO

# 加载模型
model = YOLO('yolov8n.pt') # 加载预训练模型

# 双卡训练
model.train(
data='datasets/human-pose/data/human-pose.yaml',
epochs=300,
device=[0, 1])

# 启动验证
model.val()

预测

导包、模型加载 & GPU加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import os
import cv2
import numpy as np
import time
import torch
from tqdm import tqdm

from ultralytics import YOLO
import matplotlib.pyplot as plt

# GPU
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print('using device:', device)

model = YOLO('yolov8n-pose.pt')
# model.to(device)

设计样式参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# 检测框(rectangle)可视化配置
bbox_color = (150, 0, 0) # 框的 BGR 颜色
bbox_thickness = 2 # 框的线宽

# 检测框类别文字
bbox_labelstr = {
'font_size': 1, # 字体大小
'font_thickness': 2, # 字体粗细
'offset_x': 0, # X 方向,文字偏移距离,向右为正
'offset_y': -10, # Y 方向,文字偏移距离,向下为正
}

# 关键点 BGR 配色
kpt_color_map = {
0: {'name': 'Nose', 'color': [0, 0, 255], 'radius': 6}, # 鼻尖
1: {'name': 'Right Eye', 'color': [255, 0, 0], 'radius': 6}, # 右边眼睛
2: {'name': 'Left Eye', 'color': [255, 0, 0], 'radius': 6}, # 左边眼睛
3: {'name': 'Right Ear', 'color': [0, 255, 0], 'radius': 6}, # 右边耳朵
4: {'name': 'Left Ear', 'color': [0, 255, 0], 'radius': 6}, # 左边耳朵
5: {'name': 'Right Shoulder', 'color': [193, 182, 255], 'radius': 6}, # 右边肩膀
6: {'name': 'Left Shoulder', 'color': [193, 182, 255], 'radius': 6}, # 左边肩膀
7: {'name': 'Right Elbow', 'color': [16, 144, 247], 'radius': 6}, # 右侧胳膊肘
8: {'name': 'Left Elbow', 'color': [16, 144, 247], 'radius': 6}, # 左侧胳膊肘
9: {'name': 'Right Wrist', 'color': [1, 240, 255], 'radius': 6}, # 右侧手腕
10: {'name': 'Left Wrist', 'color': [1, 240, 255], 'radius': 6}, # 左侧手腕
11: {'name': 'Right Hip', 'color': [140, 47, 240], 'radius': 6}, # 右侧胯
12: {'name': 'Left Hip', 'color': [140, 47, 240], 'radius': 6}, # 左侧胯
13: {'name': 'Right Knee', 'color': [223, 155, 60], 'radius': 6}, # 右侧膝盖
14: {'name': 'Left Knee', 'color': [223, 155, 60], 'radius': 6}, # 左侧膝盖
15: {'name': 'Right Ankle', 'color': [139, 0, 0], 'radius': 6}, # 右侧脚踝
16: {'name': 'Left Ankle', 'color': [139, 0, 0], 'radius': 6}, # 左侧脚踝
}

# 点类别文字
kpt_labelstr = {
'font_size': 0.5, # 字体大小
'font_thickness': 1, # 字体粗细
'offset_x': 10, # X 方向,文字偏移距离,向右为正
'offset_y': 0, # Y 方向,文字偏移距离,向下为正
}

# 骨架连接 BGR 配色
skeleton_map = [
{'srt_kpt_id': 15, 'dst_kpt_id': 13, 'color': [0, 100, 255], 'thickness': 2}, # 右侧脚踝-右侧膝盖
{'srt_kpt_id': 13, 'dst_kpt_id': 11, 'color': [0, 255, 0], 'thickness': 2}, # 右侧膝盖-右侧胯
{'srt_kpt_id': 16, 'dst_kpt_id': 14, 'color': [255, 0, 0], 'thickness': 2}, # 左侧脚踝-左侧膝盖
{'srt_kpt_id': 14, 'dst_kpt_id': 12, 'color': [0, 0, 255], 'thickness': 2}, # 左侧膝盖-左侧胯
{'srt_kpt_id': 11, 'dst_kpt_id': 12, 'color': [122, 160, 255], 'thickness': 2}, # 右侧胯-左侧胯
{'srt_kpt_id': 5, 'dst_kpt_id': 11, 'color': [139, 0, 139], 'thickness': 2}, # 右边肩膀-右侧胯
{'srt_kpt_id': 6, 'dst_kpt_id': 12, 'color': [237, 149, 100], 'thickness': 2}, # 左边肩膀-左侧胯
{'srt_kpt_id': 5, 'dst_kpt_id': 6, 'color': [152, 251, 152], 'thickness': 2}, # 右边肩膀-左边肩膀
{'srt_kpt_id': 5, 'dst_kpt_id': 7, 'color': [148, 0, 69], 'thickness': 2}, # 右边肩膀-右侧胳膊肘
{'srt_kpt_id': 6, 'dst_kpt_id': 8, 'color': [0, 75, 255], 'thickness': 2}, # 左边肩膀-左侧胳膊肘
{'srt_kpt_id': 7, 'dst_kpt_id': 9, 'color': [56, 230, 25], 'thickness': 2}, # 右侧胳膊肘-右侧手腕
{'srt_kpt_id': 8, 'dst_kpt_id': 10, 'color': [0, 240, 240], 'thickness': 2}, # 左侧胳膊肘-左侧手腕
{'srt_kpt_id': 1, 'dst_kpt_id': 2, 'color': [224, 255, 255], 'thickness': 2}, # 右边眼睛-左边眼睛
{'srt_kpt_id': 0, 'dst_kpt_id': 1, 'color': [47, 255, 173], 'thickness': 2}, # 鼻尖-左边眼睛
{'srt_kpt_id': 0, 'dst_kpt_id': 2, 'color': [203, 192, 255], 'thickness': 2}, # 鼻尖-左边眼睛
{'srt_kpt_id': 1, 'dst_kpt_id': 3, 'color': [196, 75, 255], 'thickness': 2}, # 右边眼睛-右边耳朵
{'srt_kpt_id': 2, 'dst_kpt_id': 4, 'color': [86, 0, 25], 'thickness': 2}, # 左边眼睛-左边耳朵
{'srt_kpt_id': 3, 'dst_kpt_id': 5, 'color': [255, 255, 0], 'thickness': 2}, # 右边耳朵-右边肩膀
{'srt_kpt_id': 4, 'dst_kpt_id': 6, 'color': [255, 18, 200], 'thickness': 2} # 左边耳朵-左边肩膀
]

视频处理函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def process_frame(img_bgr):
"""
输入摄像头画面 bgr-array,输出图像 bgr-array
"""

results = model(img_bgr, verbose=False) # verbose设置为False,不单独打印每一帧预测结果
# 预测框的个数
num_bbox = len(results[0].boxes.cls)
# 预测框的 xyxy 坐标
bboxes_xyxy = results[0].boxes.xyxy.cpu().numpy().astype('uint32')
# 关键点的 xy 坐标
bboxes_keypoints = results[0].keypoints.data.cpu().numpy()
for idx in range(num_bbox): # 遍历每个框
# 获取该框坐标
bbox_xyxy = bboxes_xyxy[idx]
# 获取框的预测类别(对于关键点检测,只有一个类别)
bbox_label = results[0].names[0]
# 画框
img_bgr = cv2.rectangle(img_bgr, (bbox_xyxy[0], bbox_xyxy[1]), (bbox_xyxy[2], bbox_xyxy[3]), bbox_color,
bbox_thickness)
# 写框类别文字:图片,文字字符串,文字左上角坐标,字体,字体大小,颜色,字体粗细
img_bgr = cv2.putText(img_bgr, bbox_label,
(bbox_xyxy[0] + bbox_labelstr['offset_x'], bbox_xyxy[1] + bbox_labelstr['offset_y']),
cv2.FONT_HERSHEY_SIMPLEX, bbox_labelstr['font_size'], bbox_color,
bbox_labelstr['font_thickness'])
bbox_keypoints = bboxes_keypoints[idx] # 该框所有关键点坐标和置信度
# 画该框的骨架连接
for skeleton in skeleton_map:
# 获取起始点坐标
srt_kpt_id = skeleton['srt_kpt_id']
srt_kpt_x = round(bbox_keypoints[srt_kpt_id][0])
srt_kpt_y = round(bbox_keypoints[srt_kpt_id][1])
srt_kpt_conf = bbox_keypoints[srt_kpt_id][2] # 获取起始点置信度
# print(srt_kpt_conf)
# 获取终止点坐标
dst_kpt_id = skeleton['dst_kpt_id']
dst_kpt_x = round(bbox_keypoints[dst_kpt_id][0])
dst_kpt_y = round(bbox_keypoints[dst_kpt_id][1])
dst_kpt_conf = bbox_keypoints[dst_kpt_id][2] # 获取终止点置信度
# print(dst_kpt_conf)
# 获取骨架连接颜色
skeleton_color = skeleton['color']
# 获取骨架连接线宽
skeleton_thickness = skeleton['thickness']
# 如果起始点和终止点的置信度都高于阈值,才画骨架连接
if srt_kpt_conf > 0.5 and dst_kpt_conf > 0.5:
# 画骨架连接
img_bgr = cv2.line(img_bgr, (srt_kpt_x, srt_kpt_y), (dst_kpt_x, dst_kpt_y), color=skeleton_color,
thickness=skeleton_thickness)
# 画该框的关键点
for kpt_id in kpt_color_map:
# 获取该关键点的颜色、半径、XY坐标
kpt_color = kpt_color_map[kpt_id]['color']
kpt_radius = kpt_color_map[kpt_id]['radius']
kpt_x = round(bbox_keypoints[kpt_id][0])
kpt_y = round(bbox_keypoints[kpt_id][1])
kpt_conf = bbox_keypoints[kpt_id][2] # 获取该关键点置信度
if kpt_conf > 0.5:
# 画圆:图片、XY坐标、半径、颜色、线宽(-1为填充)
img_bgr = cv2.circle(img_bgr, (kpt_x, kpt_y), kpt_radius, kpt_color, -1)

return img_bgr

CV2处理函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def generate_video(input_path='video/robot.mp4'):
file_head = input_path.split('/')[-1]
output_path = "out-" + file_head

print('视频开始处理', input_path)

# 获取视频总帧数
cap = cv2.VideoCapture(input_path)
frame_count = 0
while cap.isOpened():
success, frame = cap.read()
frame_count += 1
if not success:
break
cap.release()
print('视频总帧数为', frame_count)

# cv2.namedWindow('Crack Detection and Measurement Video Processing')
cap = cv2.VideoCapture(input_path)
frame_size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)

out = cv2.VideoWriter(output_path, fourcc, fps, (int(frame_size[0]), int(frame_size[1])))

# 进度条绑定视频总帧数
with tqdm(total=frame_count - 1) as pbar:
# noinspection PyBroadException
try:
while cap.isOpened():
success, frame = cap.read()
if not success:
break

# noinspection PyBroadException
try:
frame = process_frame(frame)
except Exception:
print('error')
pass

if success:
# cv2.imshow('Video Processing', frame)
out.write(frame)

# 进度条更新一帧
pbar.update(1)

# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
except Exception:
print('中途中断')
pass

cv2.destroyAllWindows()
out.release()
cap.release()
print('视频已保存', output_path)

预测

将代码整合以后,执行:

1
2
if __name__ == '__main__':
generate_video(input_path='video/test.mp4')

Powered by Hexo and Hexo-theme-hiker

Copyright © 2017 - 2024 青域 All Rights Reserved.

UV : | PV :