First Commit
This commit is contained in:
88
README.md
Normal file
88
README.md
Normal file
@@ -0,0 +1,88 @@
|
||||
<h1 align="center"><span>YOLOv9-OpenVINO</span></h1>
|
||||
|
||||
C++ and python implementation of [YOLOv9](https://github.com/WongKinYiu/yolov9) using Openvino Backend.
|
||||
|
||||
<p align="center" margin: 0 auto;>
|
||||
<img src="result.jpg"/>
|
||||
</p>
|
||||
|
||||
## 🤖 Model
|
||||
|
||||
- Download yolov9-c openvino model: [yolov9-c-converted](https://drive.google.com/file/d/1eBs2zlPmPoa-K2N4enTG3srXmesKQyM9/view?usp=sharing)
|
||||
- Or convert your custom yolov9 model to openvino format:
|
||||
``` shell
|
||||
ovc yolov9-c-converted.onnx --compress_to_fp16 True --input images[1,3,640,640]
|
||||
```
|
||||
`ovc` is a command-line model converter that converts trained models in onnx or pytorch format to an OpenVINO model in bin, xml format.
|
||||
|
||||
|
||||
## ⚙️ Build
|
||||
|
||||
#### Python
|
||||
|
||||
The following command will install openvino python with the `ovc` api:
|
||||
|
||||
``` shell
|
||||
cd python
|
||||
pip install -r requirement.txt
|
||||
```
|
||||
|
||||
#### C++
|
||||
|
||||
1. Download [openvino](https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/windows/) and install it following [this guide](https://docs.openvino.ai/2023.3/openvino_docs_install_guides_installing_openvino_from_archive_windows.html)
|
||||
2. Modify your openvino and opencv paths in [CMakeLists.txt](https://github.com/spacewalk01/yolov9-openvino/blob/main/cpp/CMakeLists.txt)
|
||||
3. Run the following command to build the project
|
||||
|
||||
``` shell
|
||||
cd cpp
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
## 🚀 Inference
|
||||
|
||||
#### Python
|
||||
|
||||
Usage:
|
||||
``` shell
|
||||
python main.py --model=<model path> --data_path=<data path> --score_thr=<score> --nms_thr=<nms>
|
||||
```
|
||||
|
||||
Examples:
|
||||
``` shell
|
||||
# infer an image
|
||||
python main.py --model=yolov9-c-converted.xml --data_path=test.jpg
|
||||
# infer a folder(images)
|
||||
python main.py --model=yolov9-c-converted.xml --data_path=data
|
||||
# infer a video
|
||||
python main.py --model=yolov9-c-converted.xml --data_path=test.mp4
|
||||
```
|
||||
|
||||
#### C++
|
||||
|
||||
Usage:
|
||||
``` shell
|
||||
yolov9-openvino-cpp.exe <xml model path> <data> <confidence threshold> <nms threshold>
|
||||
```
|
||||
|
||||
Examples:
|
||||
``` shell
|
||||
# infer an image
|
||||
yolov9-openvino.exe yolov9-c-converted.xml test.jpg
|
||||
# infer a folder(images)
|
||||
yolov9-openvino.exe yolov9-c-converted.xml data
|
||||
# infer a video
|
||||
yolov9-openvino.exe yolov9-c-converted.xml test.mp4 # the video path
|
||||
```
|
||||
|
||||
## 🖥️ Requirement
|
||||
|
||||
- OpenVINO™ 2023.3.0
|
||||
- OpenCV
|
||||
|
||||
## 🔗 Acknowledgement
|
||||
This repo is based on the following projects:
|
||||
- [yolov5-openvino](https://github.com/dacquaviva/yolov5-openvino-cpp-python) - Example of using ultralytics YOLOv5 with Openvino in C++ and Python
|
||||
- [YOLOv9](https://github.com/WongKinYiu/yolov9) - Learning What You Want to Learn Using Programmable Gradient Information
|
||||
41
cpp/CMakeLists.txt
Normal file
41
cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,41 @@
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
project(yolov9-openvino)
|
||||
|
||||
# Set C++ standard
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
|
||||
# Include CUDA directories
|
||||
|
||||
# Add source files
|
||||
set(SOURCES
|
||||
main.cpp
|
||||
yolov9_openvino.cpp
|
||||
)
|
||||
|
||||
# Add headers
|
||||
set(HEADERS
|
||||
yolov9_openvino.h
|
||||
)
|
||||
|
||||
# Set your OpenCV path
|
||||
set(OpenCV_DIR "C:\\opencv490\\build")
|
||||
find_package(OpenCV REQUIRED)
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
|
||||
# Set your OpenVINO path
|
||||
set(OPENVINO_DIR "C:\\Program Files (x86)\\Intel\\openvino_2023\\runtime")
|
||||
|
||||
# Include TensorRT
|
||||
include_directories(${OPENVINO_DIR}/include)
|
||||
link_directories(${OPENVINO_DIR}/lib/intel64/release)
|
||||
set(OPENVINO_LIBS openvino openvino_c)
|
||||
|
||||
# Create an executable
|
||||
add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
|
||||
|
||||
# Link libraries
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
${OpenCV_LIBS}
|
||||
${OPENVINO_LIBS}
|
||||
)
|
||||
149
cpp/main.cpp
Normal file
149
cpp/main.cpp
Normal file
@@ -0,0 +1,149 @@
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "yolov9_openvino.h"
|
||||
|
||||
|
||||
bool IsPathExist(const string& path) {
|
||||
#ifdef _WIN32
|
||||
DWORD fileAttributes = GetFileAttributesA(path.c_str());
|
||||
return (fileAttributes != INVALID_FILE_ATTRIBUTES);
|
||||
#else
|
||||
return (access(path.c_str(), F_OK) == 0);
|
||||
#endif
|
||||
}
|
||||
bool IsFile(const string& path) {
|
||||
if (!IsPathExist(path)) {
|
||||
printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
DWORD fileAttributes = GetFileAttributesA(path.c_str());
|
||||
return ((fileAttributes != INVALID_FILE_ATTRIBUTES) && ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0));
|
||||
#else
|
||||
struct stat buffer;
|
||||
return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const string model_file_path{ argv[1] };
|
||||
const string path{ argv[2] };
|
||||
vector<string> imagePathList;
|
||||
bool isVideo{ false };
|
||||
assert(argc >= 3);
|
||||
|
||||
float conf_thresh = 0.2;
|
||||
float nms_thresh = 0.3;
|
||||
if (argc > 3)
|
||||
{
|
||||
conf_thresh = std::stof(argv[3]);
|
||||
}
|
||||
else if (argc > 4)
|
||||
{
|
||||
nms_thresh = std::stof(argv[3]);
|
||||
}
|
||||
|
||||
if (IsFile(path))
|
||||
{
|
||||
string suffix = path.substr(path.find_last_of('.') + 1);
|
||||
if (suffix == "jpg" || suffix == "jpeg" || suffix == "png")
|
||||
{
|
||||
imagePathList.push_back(path);
|
||||
}
|
||||
else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov" || suffix == "mkv" || suffix == "webm")
|
||||
{
|
||||
isVideo = true;
|
||||
}
|
||||
else {
|
||||
printf("suffix %s is wrong !!!\n", suffix.c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
else if (IsPathExist(path))
|
||||
{
|
||||
glob(path + "/*.jpg", imagePathList);
|
||||
}
|
||||
|
||||
// Assume it's a folder, add logic to handle folders
|
||||
// init model
|
||||
Yolov9 model(model_file_path);
|
||||
model.setConf(conf_thresh);
|
||||
model.setNMS(nms_thresh);
|
||||
|
||||
if (isVideo) {
|
||||
//path to video
|
||||
string VideoPath = path;
|
||||
// open cap
|
||||
VideoCapture cap(VideoPath);
|
||||
|
||||
int width = cap.get(CAP_PROP_FRAME_WIDTH);
|
||||
int height = cap.get(CAP_PROP_FRAME_HEIGHT);
|
||||
|
||||
// Create a VideoWriter object to save the processed video
|
||||
VideoWriter output_video("output_video.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), 30, Size(width, height));
|
||||
while (1)
|
||||
{
|
||||
Mat frame;
|
||||
cap >> frame;
|
||||
|
||||
if (frame.empty()) break;
|
||||
|
||||
Resize res = model.resize_and_pad(frame);
|
||||
|
||||
vector<Detection> bboxes;
|
||||
model.predict(res.resized_image, bboxes);
|
||||
model.draw(frame, bboxes, res.dw, res.dh);
|
||||
|
||||
cv::imshow("prediction", frame);
|
||||
output_video.write(frame);
|
||||
cv::waitKey(1);
|
||||
}
|
||||
|
||||
// Release resources
|
||||
cv::destroyAllWindows();
|
||||
cap.release();
|
||||
output_video.release();
|
||||
}
|
||||
else {
|
||||
// path to folder saves images
|
||||
string imageFolderPath_out = "results/";
|
||||
for (const auto& imagePath : imagePathList)
|
||||
{
|
||||
// open image
|
||||
Mat frame = imread(imagePath);
|
||||
if (frame.empty())
|
||||
{
|
||||
cerr << "Error reading image: " << imagePath << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
Resize res = model.resize_and_pad(frame);
|
||||
|
||||
vector<Detection> bboxes;
|
||||
model.predict(res.resized_image, bboxes);
|
||||
model.draw(frame, bboxes, res.dw, res.dh);
|
||||
|
||||
istringstream iss(imagePath);
|
||||
string token;
|
||||
while (getline(iss, token, '/'))
|
||||
{
|
||||
}
|
||||
imwrite(imageFolderPath_out + token, frame);
|
||||
std::cout << imageFolderPath_out + token << endl;
|
||||
|
||||
cv::imshow("prediction", frame);
|
||||
cv::waitKey(0);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
170
cpp/yolov9_openvino.cpp
Normal file
170
cpp/yolov9_openvino.cpp
Normal file
@@ -0,0 +1,170 @@
|
||||
#include "yolov9_openvino.h"
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
const vector<string> coconame = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
|
||||
"truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
|
||||
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
|
||||
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
|
||||
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
|
||||
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
|
||||
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
||||
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
|
||||
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
|
||||
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
|
||||
"toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
|
||||
"teddy bear", "hair drier", "toothbrush" };
|
||||
|
||||
|
||||
Resize Yolov9::resize_and_pad(cv::Mat& img)
|
||||
{
|
||||
ov::Shape input_shape = compiled_model.input().get_shape();
|
||||
|
||||
float width = img.cols;
|
||||
float height = img.rows;
|
||||
float r = float(input_shape[1] / max(width, height));
|
||||
int new_unpadW = int(round(width * r));
|
||||
int new_unpadH = int(round(height * r));
|
||||
Resize resize;
|
||||
cv::resize(img, resize.resized_image, cv::Size(new_unpadW, new_unpadH), 0, 0, cv::INTER_AREA);
|
||||
|
||||
resize.dw = input_shape[1] - new_unpadW;
|
||||
resize.dh = input_shape[2] - new_unpadH;
|
||||
cv::Scalar color = cv::Scalar(100, 100, 100);
|
||||
cv::copyMakeBorder(resize.resized_image, resize.resized_image, 0, resize.dh, 0, resize.dw, cv::BORDER_CONSTANT, color);
|
||||
|
||||
return resize;
|
||||
}
|
||||
|
||||
Yolov9::Yolov9(const string &model_path)
|
||||
{
|
||||
// Step 1. Initialize OpenVINO Runtime core
|
||||
ov::Core core;
|
||||
// Step 2. Read a model
|
||||
std::shared_ptr<ov::Model> model = core.read_model(model_path);
|
||||
// Step 3. Inizialize Preprocessing for the model
|
||||
ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
|
||||
// Specify input image format
|
||||
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
|
||||
// Specify preprocess pipeline to input image without resizing
|
||||
ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255., 255., 255. });
|
||||
// Specify model's input layout
|
||||
ppp.input().model().set_layout("NCHW");
|
||||
// Specify output results format
|
||||
ppp.output().tensor().set_element_type(ov::element::f32);
|
||||
// Embed above steps in the graph
|
||||
model = ppp.build();
|
||||
compiled_model = core.compile_model(model, "CPU");
|
||||
|
||||
// Create random colors
|
||||
random_device rd;
|
||||
mt19937 gen(rd());
|
||||
uniform_int_distribution<int> dis(100, 255);
|
||||
for (int i = 0; i < coconame.size(); i++)
|
||||
{
|
||||
Scalar color = Scalar(dis(gen), dis(gen), dis(gen));
|
||||
colors.push_back(color);
|
||||
}
|
||||
}
|
||||
|
||||
void Yolov9::predict(cv::Mat &img, std::vector<Detection> &output)
|
||||
{
|
||||
// Step 5. Create tensor from image
|
||||
float* input_data = (float*)img.data;
|
||||
ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data);
|
||||
|
||||
// Step 6. Create an infer request for model inference
|
||||
ov::InferRequest infer_request = compiled_model.create_infer_request();
|
||||
infer_request.set_input_tensor(input_tensor);
|
||||
infer_request.infer();
|
||||
|
||||
//Step 7. Retrieve inference results
|
||||
const ov::Tensor& output_tensor = infer_request.get_output_tensor();
|
||||
ov::Shape output_shape = output_tensor.get_shape();
|
||||
|
||||
float* detections = output_tensor.data<float>();
|
||||
|
||||
// Step 8. Postprocessing including NMS
|
||||
vector<Rect> boxes;
|
||||
vector<int> class_ids;
|
||||
vector<float> confidences;
|
||||
|
||||
const Mat det_output(output_shape[1], output_shape[2], CV_32F, detections);
|
||||
|
||||
for (int i = 0; i < det_output.cols; ++i) {
|
||||
const Mat classes_scores = det_output.col(i).rowRange(4, output_shape[1]);
|
||||
Point class_id_point;
|
||||
double score;
|
||||
minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
|
||||
|
||||
if (score > CONFIDENCE_THRESHOLD) {
|
||||
const float cx = det_output.at<float>(0, i);
|
||||
const float cy = det_output.at<float>(1, i);
|
||||
const float ow = det_output.at<float>(2, i);
|
||||
const float oh = det_output.at<float>(3, i);
|
||||
Rect box;
|
||||
box.x = static_cast<int>((cx - 0.5 * ow));
|
||||
box.y = static_cast<int>((cy - 0.5 * oh));
|
||||
box.width = static_cast<int>(ow);
|
||||
box.height = static_cast<int>(oh);
|
||||
|
||||
boxes.push_back(box);
|
||||
class_ids.push_back(class_id_point.y);
|
||||
confidences.push_back(score);
|
||||
}
|
||||
}
|
||||
|
||||
vector<int> nms_result;
|
||||
dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
|
||||
|
||||
for (int i = 0; i < nms_result.size(); i++)
|
||||
{
|
||||
Detection result;
|
||||
int idx = nms_result[i];
|
||||
result.class_id = class_ids[idx];
|
||||
result.confidence = confidences[idx];
|
||||
result.box = boxes[idx];
|
||||
output.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
void Yolov9::draw(Mat& img, vector<Detection>& output, float dw, float dh)
|
||||
{
|
||||
// Step 9. Print results and save Figure with detections
|
||||
ov::Shape input_shape = compiled_model.input().get_shape();
|
||||
|
||||
for (int i = 0; i < output.size(); i++)
|
||||
{
|
||||
auto detection = output[i];
|
||||
auto box = detection.box;
|
||||
auto classId = detection.class_id;
|
||||
auto confidence = detection.confidence;
|
||||
float rx = (float)img.cols / (float)(input_shape[1] - dw);
|
||||
float ry = (float)img.rows / (float)(input_shape[2] - dh);
|
||||
box.x = rx * box.x;
|
||||
box.y = ry * box.y;
|
||||
box.width = rx * box.width;
|
||||
box.height = ry * box.height;
|
||||
float xmax = box.x + box.width;
|
||||
float ymax = box.y + box.height;
|
||||
|
||||
rectangle(img, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), colors[classId], 3);
|
||||
|
||||
// Detection box text
|
||||
string class_string = coconame[classId] + ' ' + to_string(confidence).substr(0, 4);
|
||||
Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0);
|
||||
Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20);
|
||||
rectangle(img, text_rect, colors[classId], FILLED);
|
||||
putText(img, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Yolov9::setConf(float conf)
|
||||
{
|
||||
CONFIDENCE_THRESHOLD = conf;
|
||||
}
|
||||
|
||||
void Yolov9::setNMS(float nms)
|
||||
{
|
||||
NMS_THRESHOLD = nms;
|
||||
}
|
||||
46
cpp/yolov9_openvino.h
Normal file
46
cpp/yolov9_openvino.h
Normal file
@@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <random>
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
struct Detection
|
||||
{
|
||||
int class_id;
|
||||
float confidence;
|
||||
cv::Rect box;
|
||||
};
|
||||
|
||||
|
||||
struct Resize
|
||||
{
|
||||
cv::Mat resized_image;
|
||||
int dw;
|
||||
int dh;
|
||||
};
|
||||
|
||||
class Yolov9
|
||||
{
|
||||
public:
|
||||
Yolov9(const string& model_path);
|
||||
~Yolov9() {};
|
||||
|
||||
Resize resize_and_pad(cv::Mat& img);
|
||||
void predict(cv::Mat& img, std::vector<Detection>& output);
|
||||
void draw(Mat& img, vector<Detection>& output, float dw, float dh);
|
||||
|
||||
void setConf(float conf);
|
||||
void setNMS(float nms);
|
||||
|
||||
private:
|
||||
|
||||
ov::CompiledModel compiled_model;
|
||||
|
||||
float NMS_THRESHOLD = 0.4;
|
||||
float CONFIDENCE_THRESHOLD = 0.4;
|
||||
|
||||
vector<Scalar> colors;
|
||||
};
|
||||
232
python/main.py
Normal file
232
python/main.py
Normal file
@@ -0,0 +1,232 @@
|
||||
from pathlib import Path
|
||||
|
||||
import openvino as ov
|
||||
from openvino.preprocess import PrePostProcessor
|
||||
from openvino.preprocess import ColorFormat
|
||||
from openvino import Layout, Type
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import argparse
|
||||
import os
|
||||
|
||||
coconame = [
|
||||
"karung", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
|
||||
"truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
|
||||
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
|
||||
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
|
||||
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
|
||||
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
|
||||
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
||||
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
|
||||
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
|
||||
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
|
||||
"toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
|
||||
"teddy bear", "hair drier", "toothbrush" ]
|
||||
|
||||
|
||||
class Yolov9:
|
||||
def __init__(self, xml_model_path="./model/yolov9-c-converted.xml", conf=0.2, nms=0.4):
|
||||
# Step 1. Initialize OpenVINO Runtime core
|
||||
core = ov.Core()
|
||||
# Step 2. Read a model
|
||||
model = core.read_model(str(Path(xml_model_path)))
|
||||
|
||||
# Step 3. Inizialize Preprocessing for the model
|
||||
ppp = PrePostProcessor(model)
|
||||
# Specify input image format
|
||||
ppp.input().tensor().set_element_type(Type.u8).set_layout(Layout("NHWC")).set_color_format(ColorFormat.BGR)
|
||||
# Specify preprocess pipeline to input image without resizing
|
||||
ppp.input().preprocess().convert_element_type(Type.f32).convert_color(ColorFormat.RGB).scale([255., 255., 255.])
|
||||
# Specify model's input layout
|
||||
ppp.input().model().set_layout(Layout("NCHW"))
|
||||
# Specify output results format
|
||||
ppp.output().tensor().set_element_type(Type.f32)
|
||||
# Embed above steps in the graph
|
||||
model = ppp.build()
|
||||
|
||||
self.compiled_model = core.compile_model(model, "CPU")
|
||||
|
||||
self.input_width = 640
|
||||
self.input_height = 640
|
||||
self.conf_thresh = conf
|
||||
self.nms_thresh = nms
|
||||
self.colors = []
|
||||
|
||||
# Create random colors
|
||||
np.random.seed(42) # Setting seed for reproducibility
|
||||
|
||||
for i in range(len(coconame)):
|
||||
color = tuple(np.random.randint(100, 256, size=3))
|
||||
self.colors.append(color)
|
||||
|
||||
def resize_and_pad(self, image):
|
||||
|
||||
old_size = image.shape[:2]
|
||||
ratio = float(self.input_width/max(old_size))#fix to accept also rectangular images
|
||||
new_size = tuple([int(x*ratio) for x in old_size])
|
||||
|
||||
image = cv2.resize(image, (new_size[1], new_size[0]))
|
||||
|
||||
delta_w = self.input_width - new_size[1]
|
||||
delta_h = self.input_height - new_size[0]
|
||||
|
||||
color = [100, 100, 100]
|
||||
new_im = cv2.copyMakeBorder(image, 0, delta_h, 0, delta_w, cv2.BORDER_CONSTANT, value=color)
|
||||
|
||||
return new_im, delta_w, delta_h
|
||||
|
||||
def predict(self, img):
|
||||
|
||||
# Step 4. Create tensor from image
|
||||
input_tensor = np.expand_dims(img, 0)
|
||||
|
||||
# Step 5. Create an infer request for model inference
|
||||
infer_request = self.compiled_model.create_infer_request()
|
||||
infer_request.infer({0: input_tensor})
|
||||
|
||||
# Step 6. Retrieve inference results
|
||||
output = infer_request.get_output_tensor()
|
||||
detections = output.data[0].T
|
||||
|
||||
# Step 7. Postprocessing including NMS
|
||||
boxes = []
|
||||
class_ids = []
|
||||
confidences = []
|
||||
for prediction in detections:
|
||||
classes_scores = prediction[4:]
|
||||
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
|
||||
class_id = max_indx[1]
|
||||
if (classes_scores[class_id] > self.conf_thresh):
|
||||
confidences.append(classes_scores[class_id])
|
||||
class_ids.append(class_id)
|
||||
x, y, w, h = prediction[0].item(), prediction[1].item(), prediction[2].item(), prediction[3].item()
|
||||
xmin = x - (w / 2)
|
||||
ymin = y - (h / 2)
|
||||
box = np.array([xmin, ymin, w, h])
|
||||
boxes.append(box)
|
||||
|
||||
indexes = cv2.dnn.NMSBoxes(boxes, confidences, self.conf_thresh, self.nms_thresh)
|
||||
|
||||
detections = []
|
||||
for i in indexes:
|
||||
j = i.item()
|
||||
detections.append({"class_index": class_ids[j], "confidence": confidences[j], "box": boxes[j]})
|
||||
|
||||
return detections
|
||||
|
||||
def draw(self, img, detections, dw, dh):
|
||||
# Step 8. Print results and save Figure with detections
|
||||
for detection in detections:
|
||||
|
||||
box = detection["box"]
|
||||
classId = detection["class_index"]
|
||||
confidence = detection["confidence"]
|
||||
|
||||
rx = img.shape[1] / (self.input_width - dw)
|
||||
ry = img.shape[0] / (self.input_height - dh)
|
||||
box[0] = rx * box[0]
|
||||
box[1] = ry * box[1]
|
||||
box[2] = rx * box[2]
|
||||
box[3] = ry * box[3]
|
||||
|
||||
xmax = box[0] + box[2]
|
||||
ymax = box[1] + box[3]
|
||||
|
||||
# Drawing detection box
|
||||
cv2.rectangle(img, (int(box[0]), int(box[1])), (int(xmax), int(ymax)), tuple(map(int, self.colors[classId])), 3)
|
||||
|
||||
# Detection box text
|
||||
class_string = coconame[classId] + ' ' + str(confidence)[:4]
|
||||
text_size, _ = cv2.getTextSize(class_string, cv2.FONT_HERSHEY_DUPLEX, 1, 2)
|
||||
text_rect = (box[0], box[1] - 40, text_size[0] + 10, text_size[1] + 20)
|
||||
cv2.rectangle(img,
|
||||
(int(text_rect[0]), int(text_rect[1])),
|
||||
(int(text_rect[0] + text_rect[2]), int(text_rect[1] + text_rect[3])),
|
||||
tuple(map(int, self.colors[classId])), cv2.FILLED)
|
||||
cv2.putText(img, class_string, (int(box[0] + 5), int(box[1] - 10)), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
|
||||
|
||||
def make_parser():
|
||||
parser = argparse.ArgumentParser("onnxruntime inference")
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
default="yolov9-c-converted.onnx",
|
||||
help="Input your onnx model.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--data_path",
|
||||
type=str,
|
||||
default='videos/palace.mp4',
|
||||
help="Path to your input image.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--score_thr",
|
||||
type=float,
|
||||
default=0.1,
|
||||
help="Score threshould to filter the result.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--nms_thr",
|
||||
type=float,
|
||||
default=0.3,
|
||||
help="NMS threshould.",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
# Process a single image
|
||||
def process_image(model, image_path):
|
||||
img = cv2.imread(image_path)
|
||||
img_resized, dw, dh = model.resize_and_pad(img)
|
||||
results = model.predict(img_resized)
|
||||
model.draw(img, results, dw, dh)
|
||||
cv2.imshow("result", img)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Process a folder of images
|
||||
def process_folder(model, folder_path):
|
||||
for filename in os.listdir(folder_path):
|
||||
if filename.endswith(".jpg") or filename.endswith(".png"):
|
||||
image_path = os.path.join(folder_path, filename)
|
||||
process_image(model, image_path)
|
||||
|
||||
# Process a video
|
||||
def process_video(model, video_path):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
img_resized, dw, dh = model.resize_and_pad(frame)
|
||||
results = model.predict(img_resized)
|
||||
model.draw(frame, results, dw, dh)
|
||||
cv2.imshow("result", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
args = make_parser().parse_args()
|
||||
|
||||
# Initialize YOLOv9 model (assuming xml openvino model)
|
||||
model = Yolov9(args.model)
|
||||
|
||||
if args.data_path.endswith('.jpg') or args.data_path.endswith('.png'):
|
||||
process_image(model, args.data_path)
|
||||
elif os.path.isdir(args.data_path):
|
||||
process_folder(model, args.data_path)
|
||||
elif args.data_path.endswith('.mp4'): # Add support for other video formats
|
||||
process_video(model, args.data_path)
|
||||
else:
|
||||
print("Error: Unsupported file format")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
2
python/requirements.txt
Normal file
2
python/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
openvino
|
||||
opencv-python
|
||||
Reference in New Issue
Block a user