First Commit

This commit is contained in:
2026-02-08 07:04:22 +07:00
commit 6228af7a17
7 changed files with 728 additions and 0 deletions

88
README.md Normal file
View File

@@ -0,0 +1,88 @@
<h1 align="center"><span>YOLOv9-OpenVINO</span></h1>
C++ and python implementation of [YOLOv9](https://github.com/WongKinYiu/yolov9) using Openvino Backend.
<p align="center" margin: 0 auto;>
<img src="result.jpg"/>
</p>
## 🤖 Model
- Download yolov9-c openvino model: [yolov9-c-converted](https://drive.google.com/file/d/1eBs2zlPmPoa-K2N4enTG3srXmesKQyM9/view?usp=sharing)
- Or convert your custom yolov9 model to openvino format:
``` shell
ovc yolov9-c-converted.onnx --compress_to_fp16 True --input images[1,3,640,640]
```
`ovc` is a command-line model converter that converts trained models in onnx or pytorch format to an OpenVINO model in bin, xml format.
## ⚙️ Build
#### Python
The following command will install openvino python with the `ovc` api:
``` shell
cd python
pip install -r requirement.txt
```
#### C++
1. Download [openvino](https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/windows/) and install it following [this guide](https://docs.openvino.ai/2023.3/openvino_docs_install_guides_installing_openvino_from_archive_windows.html)
2. Modify your openvino and opencv paths in [CMakeLists.txt](https://github.com/spacewalk01/yolov9-openvino/blob/main/cpp/CMakeLists.txt)
3. Run the following command to build the project
``` shell
cd cpp
mkdir build
cd build
cmake ..
make
```
## 🚀 Inference
#### Python
Usage:
``` shell
python main.py --model=<model path> --data_path=<data path> --score_thr=<score> --nms_thr=<nms>
```
Examples:
``` shell
# infer an image
python main.py --model=yolov9-c-converted.xml --data_path=test.jpg
# infer a folder(images)
python main.py --model=yolov9-c-converted.xml --data_path=data
# infer a video
python main.py --model=yolov9-c-converted.xml --data_path=test.mp4
```
#### C++
Usage:
``` shell
yolov9-openvino-cpp.exe <xml model path> <data> <confidence threshold> <nms threshold>
```
Examples:
``` shell
# infer an image
yolov9-openvino.exe yolov9-c-converted.xml test.jpg
# infer a folder(images)
yolov9-openvino.exe yolov9-c-converted.xml data
# infer a video
yolov9-openvino.exe yolov9-c-converted.xml test.mp4 # the video path
```
## 🖥️ Requirement
- OpenVINO™ 2023.3.0
- OpenCV
## 🔗 Acknowledgement
This repo is based on the following projects:
- [yolov5-openvino](https://github.com/dacquaviva/yolov5-openvino-cpp-python) - Example of using ultralytics YOLOv5 with Openvino in C++ and Python
- [YOLOv9](https://github.com/WongKinYiu/yolov9) - Learning What You Want to Learn Using Programmable Gradient Information

41
cpp/CMakeLists.txt Normal file
View File

@@ -0,0 +1,41 @@
cmake_minimum_required(VERSION 3.12)
project(yolov9-openvino)
# Set C++ standard
set(CMAKE_CXX_STANDARD 17)
# Include CUDA directories
# Add source files
set(SOURCES
main.cpp
yolov9_openvino.cpp
)
# Add headers
set(HEADERS
yolov9_openvino.h
)
# Set your OpenCV path
set(OpenCV_DIR "C:\\opencv490\\build")
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
# Set your OpenVINO path
set(OPENVINO_DIR "C:\\Program Files (x86)\\Intel\\openvino_2023\\runtime")
# Include TensorRT
include_directories(${OPENVINO_DIR}/include)
link_directories(${OPENVINO_DIR}/lib/intel64/release)
set(OPENVINO_LIBS openvino openvino_c)
# Create an executable
add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
# Link libraries
target_link_libraries(${PROJECT_NAME}
${OpenCV_LIBS}
${OPENVINO_LIBS}
)

149
cpp/main.cpp Normal file
View File

@@ -0,0 +1,149 @@
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <iostream>
#include <string>
#include "yolov9_openvino.h"
bool IsPathExist(const string& path) {
#ifdef _WIN32
DWORD fileAttributes = GetFileAttributesA(path.c_str());
return (fileAttributes != INVALID_FILE_ATTRIBUTES);
#else
return (access(path.c_str(), F_OK) == 0);
#endif
}
bool IsFile(const string& path) {
if (!IsPathExist(path)) {
printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
return false;
}
#ifdef _WIN32
DWORD fileAttributes = GetFileAttributesA(path.c_str());
return ((fileAttributes != INVALID_FILE_ATTRIBUTES) && ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0));
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
#endif
}
int main(int argc, char** argv)
{
const string model_file_path{ argv[1] };
const string path{ argv[2] };
vector<string> imagePathList;
bool isVideo{ false };
assert(argc >= 3);
float conf_thresh = 0.2;
float nms_thresh = 0.3;
if (argc > 3)
{
conf_thresh = std::stof(argv[3]);
}
else if (argc > 4)
{
nms_thresh = std::stof(argv[3]);
}
if (IsFile(path))
{
string suffix = path.substr(path.find_last_of('.') + 1);
if (suffix == "jpg" || suffix == "jpeg" || suffix == "png")
{
imagePathList.push_back(path);
}
else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov" || suffix == "mkv" || suffix == "webm")
{
isVideo = true;
}
else {
printf("suffix %s is wrong !!!\n", suffix.c_str());
abort();
}
}
else if (IsPathExist(path))
{
glob(path + "/*.jpg", imagePathList);
}
// Assume it's a folder, add logic to handle folders
// init model
Yolov9 model(model_file_path);
model.setConf(conf_thresh);
model.setNMS(nms_thresh);
if (isVideo) {
//path to video
string VideoPath = path;
// open cap
VideoCapture cap(VideoPath);
int width = cap.get(CAP_PROP_FRAME_WIDTH);
int height = cap.get(CAP_PROP_FRAME_HEIGHT);
// Create a VideoWriter object to save the processed video
VideoWriter output_video("output_video.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), 30, Size(width, height));
while (1)
{
Mat frame;
cap >> frame;
if (frame.empty()) break;
Resize res = model.resize_and_pad(frame);
vector<Detection> bboxes;
model.predict(res.resized_image, bboxes);
model.draw(frame, bboxes, res.dw, res.dh);
cv::imshow("prediction", frame);
output_video.write(frame);
cv::waitKey(1);
}
// Release resources
cv::destroyAllWindows();
cap.release();
output_video.release();
}
else {
// path to folder saves images
string imageFolderPath_out = "results/";
for (const auto& imagePath : imagePathList)
{
// open image
Mat frame = imread(imagePath);
if (frame.empty())
{
cerr << "Error reading image: " << imagePath << endl;
continue;
}
Resize res = model.resize_and_pad(frame);
vector<Detection> bboxes;
model.predict(res.resized_image, bboxes);
model.draw(frame, bboxes, res.dw, res.dh);
istringstream iss(imagePath);
string token;
while (getline(iss, token, '/'))
{
}
imwrite(imageFolderPath_out + token, frame);
std::cout << imageFolderPath_out + token << endl;
cv::imshow("prediction", frame);
cv::waitKey(0);
}
}
return 0;
}

170
cpp/yolov9_openvino.cpp Normal file
View File

@@ -0,0 +1,170 @@
#include "yolov9_openvino.h"
#include <opencv2/dnn.hpp>
const vector<string> coconame = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
"truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush" };
Resize Yolov9::resize_and_pad(cv::Mat& img)
{
ov::Shape input_shape = compiled_model.input().get_shape();
float width = img.cols;
float height = img.rows;
float r = float(input_shape[1] / max(width, height));
int new_unpadW = int(round(width * r));
int new_unpadH = int(round(height * r));
Resize resize;
cv::resize(img, resize.resized_image, cv::Size(new_unpadW, new_unpadH), 0, 0, cv::INTER_AREA);
resize.dw = input_shape[1] - new_unpadW;
resize.dh = input_shape[2] - new_unpadH;
cv::Scalar color = cv::Scalar(100, 100, 100);
cv::copyMakeBorder(resize.resized_image, resize.resized_image, 0, resize.dh, 0, resize.dw, cv::BORDER_CONSTANT, color);
return resize;
}
Yolov9::Yolov9(const string &model_path)
{
// Step 1. Initialize OpenVINO Runtime core
ov::Core core;
// Step 2. Read a model
std::shared_ptr<ov::Model> model = core.read_model(model_path);
// Step 3. Inizialize Preprocessing for the model
ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
// Specify input image format
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
// Specify preprocess pipeline to input image without resizing
ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255., 255., 255. });
// Specify model's input layout
ppp.input().model().set_layout("NCHW");
// Specify output results format
ppp.output().tensor().set_element_type(ov::element::f32);
// Embed above steps in the graph
model = ppp.build();
compiled_model = core.compile_model(model, "CPU");
// Create random colors
random_device rd;
mt19937 gen(rd());
uniform_int_distribution<int> dis(100, 255);
for (int i = 0; i < coconame.size(); i++)
{
Scalar color = Scalar(dis(gen), dis(gen), dis(gen));
colors.push_back(color);
}
}
void Yolov9::predict(cv::Mat &img, std::vector<Detection> &output)
{
// Step 5. Create tensor from image
float* input_data = (float*)img.data;
ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data);
// Step 6. Create an infer request for model inference
ov::InferRequest infer_request = compiled_model.create_infer_request();
infer_request.set_input_tensor(input_tensor);
infer_request.infer();
//Step 7. Retrieve inference results
const ov::Tensor& output_tensor = infer_request.get_output_tensor();
ov::Shape output_shape = output_tensor.get_shape();
float* detections = output_tensor.data<float>();
// Step 8. Postprocessing including NMS
vector<Rect> boxes;
vector<int> class_ids;
vector<float> confidences;
const Mat det_output(output_shape[1], output_shape[2], CV_32F, detections);
for (int i = 0; i < det_output.cols; ++i) {
const Mat classes_scores = det_output.col(i).rowRange(4, output_shape[1]);
Point class_id_point;
double score;
minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
if (score > CONFIDENCE_THRESHOLD) {
const float cx = det_output.at<float>(0, i);
const float cy = det_output.at<float>(1, i);
const float ow = det_output.at<float>(2, i);
const float oh = det_output.at<float>(3, i);
Rect box;
box.x = static_cast<int>((cx - 0.5 * ow));
box.y = static_cast<int>((cy - 0.5 * oh));
box.width = static_cast<int>(ow);
box.height = static_cast<int>(oh);
boxes.push_back(box);
class_ids.push_back(class_id_point.y);
confidences.push_back(score);
}
}
vector<int> nms_result;
dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++)
{
Detection result;
int idx = nms_result[i];
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
void Yolov9::draw(Mat& img, vector<Detection>& output, float dw, float dh)
{
// Step 9. Print results and save Figure with detections
ov::Shape input_shape = compiled_model.input().get_shape();
for (int i = 0; i < output.size(); i++)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
auto confidence = detection.confidence;
float rx = (float)img.cols / (float)(input_shape[1] - dw);
float ry = (float)img.rows / (float)(input_shape[2] - dh);
box.x = rx * box.x;
box.y = ry * box.y;
box.width = rx * box.width;
box.height = ry * box.height;
float xmax = box.x + box.width;
float ymax = box.y + box.height;
rectangle(img, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), colors[classId], 3);
// Detection box text
string class_string = coconame[classId] + ' ' + to_string(confidence).substr(0, 4);
Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0);
Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20);
rectangle(img, text_rect, colors[classId], FILLED);
putText(img, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0);
}
}
void Yolov9::setConf(float conf)
{
CONFIDENCE_THRESHOLD = conf;
}
void Yolov9::setNMS(float nms)
{
NMS_THRESHOLD = nms;
}

46
cpp/yolov9_openvino.h Normal file
View File

@@ -0,0 +1,46 @@
#pragma once
#include <opencv2/opencv.hpp>
#include <openvino/openvino.hpp>
#include <random>
using namespace std;
using namespace cv;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
struct Resize
{
cv::Mat resized_image;
int dw;
int dh;
};
class Yolov9
{
public:
Yolov9(const string& model_path);
~Yolov9() {};
Resize resize_and_pad(cv::Mat& img);
void predict(cv::Mat& img, std::vector<Detection>& output);
void draw(Mat& img, vector<Detection>& output, float dw, float dh);
void setConf(float conf);
void setNMS(float nms);
private:
ov::CompiledModel compiled_model;
float NMS_THRESHOLD = 0.4;
float CONFIDENCE_THRESHOLD = 0.4;
vector<Scalar> colors;
};

232
python/main.py Normal file
View File

@@ -0,0 +1,232 @@
from pathlib import Path
import openvino as ov
from openvino.preprocess import PrePostProcessor
from openvino.preprocess import ColorFormat
from openvino import Layout, Type
import numpy as np
import cv2
import argparse
import os
coconame = [
"karung", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
"truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush" ]
class Yolov9:
def __init__(self, xml_model_path="./model/yolov9-c-converted.xml", conf=0.2, nms=0.4):
# Step 1. Initialize OpenVINO Runtime core
core = ov.Core()
# Step 2. Read a model
model = core.read_model(str(Path(xml_model_path)))
# Step 3. Inizialize Preprocessing for the model
ppp = PrePostProcessor(model)
# Specify input image format
ppp.input().tensor().set_element_type(Type.u8).set_layout(Layout("NHWC")).set_color_format(ColorFormat.BGR)
# Specify preprocess pipeline to input image without resizing
ppp.input().preprocess().convert_element_type(Type.f32).convert_color(ColorFormat.RGB).scale([255., 255., 255.])
# Specify model's input layout
ppp.input().model().set_layout(Layout("NCHW"))
# Specify output results format
ppp.output().tensor().set_element_type(Type.f32)
# Embed above steps in the graph
model = ppp.build()
self.compiled_model = core.compile_model(model, "CPU")
self.input_width = 640
self.input_height = 640
self.conf_thresh = conf
self.nms_thresh = nms
self.colors = []
# Create random colors
np.random.seed(42) # Setting seed for reproducibility
for i in range(len(coconame)):
color = tuple(np.random.randint(100, 256, size=3))
self.colors.append(color)
def resize_and_pad(self, image):
old_size = image.shape[:2]
ratio = float(self.input_width/max(old_size))#fix to accept also rectangular images
new_size = tuple([int(x*ratio) for x in old_size])
image = cv2.resize(image, (new_size[1], new_size[0]))
delta_w = self.input_width - new_size[1]
delta_h = self.input_height - new_size[0]
color = [100, 100, 100]
new_im = cv2.copyMakeBorder(image, 0, delta_h, 0, delta_w, cv2.BORDER_CONSTANT, value=color)
return new_im, delta_w, delta_h
def predict(self, img):
# Step 4. Create tensor from image
input_tensor = np.expand_dims(img, 0)
# Step 5. Create an infer request for model inference
infer_request = self.compiled_model.create_infer_request()
infer_request.infer({0: input_tensor})
# Step 6. Retrieve inference results
output = infer_request.get_output_tensor()
detections = output.data[0].T
# Step 7. Postprocessing including NMS
boxes = []
class_ids = []
confidences = []
for prediction in detections:
classes_scores = prediction[4:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > self.conf_thresh):
confidences.append(classes_scores[class_id])
class_ids.append(class_id)
x, y, w, h = prediction[0].item(), prediction[1].item(), prediction[2].item(), prediction[3].item()
xmin = x - (w / 2)
ymin = y - (h / 2)
box = np.array([xmin, ymin, w, h])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, self.conf_thresh, self.nms_thresh)
detections = []
for i in indexes:
j = i.item()
detections.append({"class_index": class_ids[j], "confidence": confidences[j], "box": boxes[j]})
return detections
def draw(self, img, detections, dw, dh):
# Step 8. Print results and save Figure with detections
for detection in detections:
box = detection["box"]
classId = detection["class_index"]
confidence = detection["confidence"]
rx = img.shape[1] / (self.input_width - dw)
ry = img.shape[0] / (self.input_height - dh)
box[0] = rx * box[0]
box[1] = ry * box[1]
box[2] = rx * box[2]
box[3] = ry * box[3]
xmax = box[0] + box[2]
ymax = box[1] + box[3]
# Drawing detection box
cv2.rectangle(img, (int(box[0]), int(box[1])), (int(xmax), int(ymax)), tuple(map(int, self.colors[classId])), 3)
# Detection box text
class_string = coconame[classId] + ' ' + str(confidence)[:4]
text_size, _ = cv2.getTextSize(class_string, cv2.FONT_HERSHEY_DUPLEX, 1, 2)
text_rect = (box[0], box[1] - 40, text_size[0] + 10, text_size[1] + 20)
cv2.rectangle(img,
(int(text_rect[0]), int(text_rect[1])),
(int(text_rect[0] + text_rect[2]), int(text_rect[1] + text_rect[3])),
tuple(map(int, self.colors[classId])), cv2.FILLED)
cv2.putText(img, class_string, (int(box[0] + 5), int(box[1] - 10)), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
def make_parser():
parser = argparse.ArgumentParser("onnxruntime inference")
parser.add_argument(
"-m",
"--model",
type=str,
default="yolov9-c-converted.onnx",
help="Input your onnx model.",
)
parser.add_argument(
"-i",
"--data_path",
type=str,
default='videos/palace.mp4',
help="Path to your input image.",
)
parser.add_argument(
"-s",
"--score_thr",
type=float,
default=0.1,
help="Score threshould to filter the result.",
)
parser.add_argument(
"-n",
"--nms_thr",
type=float,
default=0.3,
help="NMS threshould.",
)
return parser
# Process a single image
def process_image(model, image_path):
img = cv2.imread(image_path)
img_resized, dw, dh = model.resize_and_pad(img)
results = model.predict(img_resized)
model.draw(img, results, dw, dh)
cv2.imshow("result", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Process a folder of images
def process_folder(model, folder_path):
for filename in os.listdir(folder_path):
if filename.endswith(".jpg") or filename.endswith(".png"):
image_path = os.path.join(folder_path, filename)
process_image(model, image_path)
# Process a video
def process_video(model, video_path):
cap = cv2.VideoCapture(video_path)
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
img_resized, dw, dh = model.resize_and_pad(frame)
results = model.predict(img_resized)
model.draw(frame, results, dw, dh)
cv2.imshow("result", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
args = make_parser().parse_args()
# Initialize YOLOv9 model (assuming xml openvino model)
model = Yolov9(args.model)
if args.data_path.endswith('.jpg') or args.data_path.endswith('.png'):
process_image(model, args.data_path)
elif os.path.isdir(args.data_path):
process_folder(model, args.data_path)
elif args.data_path.endswith('.mp4'): # Add support for other video formats
process_video(model, args.data_path)
else:
print("Error: Unsupported file format")
if __name__ == "__main__":
main()

2
python/requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
openvino
opencv-python