First Commit

2026-02-08 07:04:22 +07:00
commit 6228af7a17
7 changed files with 728 additions and 0 deletions
@@ -0,0 +1,41 @@
+cmake_minimum_required(VERSION 3.12)
+project(yolov9-openvino)
+
+# Set C++ standard
+set(CMAKE_CXX_STANDARD 17)
+
+
+# Include CUDA directories
+
+# Add source files
+set(SOURCES
+    main.cpp
+    yolov9_openvino.cpp
+)
+
+# Add headers
+set(HEADERS
+    yolov9_openvino.h
+)
+
+# Set your OpenCV path 
+set(OpenCV_DIR "C:\\opencv490\\build")
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+# Set your OpenVINO path 
+set(OPENVINO_DIR "C:\\Program Files (x86)\\Intel\\openvino_2023\\runtime")
+
+# Include TensorRT
+include_directories(${OPENVINO_DIR}/include)
+link_directories(${OPENVINO_DIR}/lib/intel64/release)
+set(OPENVINO_LIBS openvino openvino_c)
+
+# Create an executable
+add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
+
+# Link libraries
+target_link_libraries(${PROJECT_NAME}
+    ${OpenCV_LIBS}
+    ${OPENVINO_LIBS}
+)
@@ -0,0 +1,149 @@
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+#include <iostream>
+#include <string>
+#include "yolov9_openvino.h"
+
+
+bool IsPathExist(const string& path) {
+#ifdef _WIN32
+    DWORD fileAttributes = GetFileAttributesA(path.c_str());
+    return (fileAttributes != INVALID_FILE_ATTRIBUTES);
+#else
+    return (access(path.c_str(), F_OK) == 0);
+#endif
+}
+bool IsFile(const string& path) {
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+
+#ifdef _WIN32
+    DWORD fileAttributes = GetFileAttributesA(path.c_str());
+    return ((fileAttributes != INVALID_FILE_ATTRIBUTES) && ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0));
+#else
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+#endif
+}
+
+int main(int argc, char** argv)
+{
+    const string model_file_path{ argv[1] };
+    const string path{ argv[2] };
+    vector<string> imagePathList;
+    bool                     isVideo{ false };
+    assert(argc >= 3);
+
+    float conf_thresh = 0.2;
+    float nms_thresh = 0.3;
+    if (argc > 3)
+    {
+        conf_thresh = std::stof(argv[3]);
+    }
+    else if (argc > 4)
+    {
+        nms_thresh = std::stof(argv[3]);
+    }
+
+    if (IsFile(path))
+    {
+        string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png")
+        {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov" || suffix == "mkv" || suffix == "webm")
+        {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            abort();
+        }
+    }
+    else if (IsPathExist(path))
+    {
+        glob(path + "/*.jpg", imagePathList);
+    }
+
+    // Assume it's a folder, add logic to handle folders
+    // init model
+    Yolov9 model(model_file_path);
+    model.setConf(conf_thresh);
+    model.setNMS(nms_thresh);
+
+    if (isVideo) {
+        //path to video
+        string VideoPath = path;
+        // open cap
+        VideoCapture cap(VideoPath);
+
+        int width = cap.get(CAP_PROP_FRAME_WIDTH);
+        int height = cap.get(CAP_PROP_FRAME_HEIGHT);
+
+        // Create a VideoWriter object to save the processed video
+        VideoWriter output_video("output_video.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), 30, Size(width, height));
+        while (1)
+        {
+            Mat frame;
+            cap >> frame;
+
+            if (frame.empty()) break;
+
+            Resize res = model.resize_and_pad(frame);
+
+            vector<Detection> bboxes;
+            model.predict(res.resized_image, bboxes);
+            model.draw(frame, bboxes, res.dw, res.dh);
+
+            cv::imshow("prediction", frame);
+            output_video.write(frame);
+            cv::waitKey(1);
+        }
+
+        // Release resources
+        cv::destroyAllWindows();
+        cap.release();
+        output_video.release();
+    }
+    else {
+        // path to folder saves images
+        string imageFolderPath_out = "results/";
+        for (const auto& imagePath : imagePathList)
+        {
+            // open image
+            Mat frame = imread(imagePath);
+            if (frame.empty())
+            {
+                cerr << "Error reading image: " << imagePath << endl;
+                continue;
+            }
+
+            Resize res = model.resize_and_pad(frame);
+
+            vector<Detection> bboxes;
+            model.predict(res.resized_image, bboxes);
+            model.draw(frame, bboxes, res.dw, res.dh);
+
+            istringstream iss(imagePath);
+            string token;
+            while (getline(iss, token, '/'))
+            {
+            }
+            imwrite(imageFolderPath_out + token, frame);
+            std::cout << imageFolderPath_out + token << endl;
+
+            cv::imshow("prediction", frame);
+            cv::waitKey(0);
+        }
+    }
+
+    return 0;
+}
@@ -0,0 +1,170 @@
+#include "yolov9_openvino.h"
+#include <opencv2/dnn.hpp>
+
+const vector<string> coconame = {
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush" };
+
+
+Resize Yolov9::resize_and_pad(cv::Mat& img) 
+{
+    ov::Shape input_shape = compiled_model.input().get_shape();
+    
+    float width = img.cols;
+    float height = img.rows;
+    float r = float(input_shape[1] / max(width, height));
+    int new_unpadW = int(round(width * r));
+    int new_unpadH = int(round(height * r));
+    Resize resize;
+    cv::resize(img, resize.resized_image, cv::Size(new_unpadW, new_unpadH), 0, 0, cv::INTER_AREA);
+
+    resize.dw = input_shape[1] - new_unpadW;
+    resize.dh = input_shape[2] - new_unpadH;
+    cv::Scalar color = cv::Scalar(100, 100, 100);
+    cv::copyMakeBorder(resize.resized_image, resize.resized_image, 0, resize.dh, 0, resize.dw, cv::BORDER_CONSTANT, color);
+
+    return resize;
+}
+
+Yolov9::Yolov9(const string &model_path)
+{
+    // Step 1. Initialize OpenVINO Runtime core
+    ov::Core core;
+    // Step 2. Read a model
+    std::shared_ptr<ov::Model> model = core.read_model(model_path);
+    // Step 3. Inizialize Preprocessing for the model
+    ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
+    // Specify input image format
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
+    // Specify preprocess pipeline to input image without resizing
+    ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255., 255., 255. });
+    //  Specify model's input layout
+    ppp.input().model().set_layout("NCHW");
+    // Specify output results format
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    // Embed above steps in the graph
+    model = ppp.build();
+    compiled_model = core.compile_model(model, "CPU");
+
+    // Create random colors
+    random_device rd;
+    mt19937 gen(rd());
+    uniform_int_distribution<int> dis(100, 255);
+    for (int i = 0; i < coconame.size(); i++)
+    {
+        Scalar color = Scalar(dis(gen), dis(gen), dis(gen));
+        colors.push_back(color);
+    }
+}
+
+void Yolov9::predict(cv::Mat &img, std::vector<Detection> &output)
+{
+    // Step 5. Create tensor from image
+    float* input_data = (float*)img.data;
+    ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data);
+
+    // Step 6. Create an infer request for model inference 
+    ov::InferRequest infer_request = compiled_model.create_infer_request();
+    infer_request.set_input_tensor(input_tensor);
+    infer_request.infer();
+
+    //Step 7. Retrieve inference results 
+    const ov::Tensor& output_tensor = infer_request.get_output_tensor();
+    ov::Shape output_shape = output_tensor.get_shape();
+
+    float* detections = output_tensor.data<float>();
+
+    // Step 8. Postprocessing including NMS  
+    vector<Rect> boxes;
+    vector<int> class_ids;
+    vector<float> confidences;
+
+    const Mat det_output(output_shape[1], output_shape[2], CV_32F, detections);
+
+    for (int i = 0; i < det_output.cols; ++i) {
+        const Mat classes_scores = det_output.col(i).rowRange(4, output_shape[1]);
+        Point class_id_point;
+        double score;
+        minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
+
+        if (score > CONFIDENCE_THRESHOLD) {
+            const float cx = det_output.at<float>(0, i);
+            const float cy = det_output.at<float>(1, i);
+            const float ow = det_output.at<float>(2, i);
+            const float oh = det_output.at<float>(3, i);
+            Rect box;
+            box.x = static_cast<int>((cx - 0.5 * ow));
+            box.y = static_cast<int>((cy - 0.5 * oh));
+            box.width = static_cast<int>(ow);
+            box.height = static_cast<int>(oh);
+
+            boxes.push_back(box);
+            class_ids.push_back(class_id_point.y);
+            confidences.push_back(score);
+        }
+    }
+
+    vector<int> nms_result;
+    dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
+
+    for (int i = 0; i < nms_result.size(); i++)
+    {
+        Detection result;
+        int idx = nms_result[i];
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+        result.box = boxes[idx];
+        output.push_back(result);
+    }
+}
+
+void Yolov9::draw(Mat& img, vector<Detection>& output, float dw, float dh)
+{
+    // Step 9. Print results and save Figure with detections
+    ov::Shape input_shape = compiled_model.input().get_shape();
+
+    for (int i = 0; i < output.size(); i++)
+    {
+        auto detection = output[i];
+        auto box = detection.box;
+        auto classId = detection.class_id;
+        auto confidence = detection.confidence;
+        float rx = (float)img.cols / (float)(input_shape[1] - dw);
+        float ry = (float)img.rows / (float)(input_shape[2] - dh);
+        box.x = rx * box.x;
+        box.y = ry * box.y;
+        box.width = rx * box.width;
+        box.height = ry * box.height;
+        float xmax = box.x + box.width;
+        float ymax = box.y + box.height;
+
+        rectangle(img, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), colors[classId], 3);
+
+        // Detection box text
+        string class_string = coconame[classId] + ' ' + to_string(confidence).substr(0, 4);
+        Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0);
+        Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20);
+        rectangle(img, text_rect, colors[classId], FILLED);
+        putText(img, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0);
+    }
+}
+
+void Yolov9::setConf(float conf)
+{
+    CONFIDENCE_THRESHOLD = conf;
+}
+
+void Yolov9::setNMS(float nms)
+{
+    NMS_THRESHOLD = nms;
+}
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+#include <random>
+
+using namespace std;
+using namespace cv;
+
+struct Detection
+{
+    int class_id;
+    float confidence;
+    cv::Rect box;
+};
+
+
+struct Resize
+{
+    cv::Mat resized_image;
+    int dw;
+    int dh;
+};
+
+class Yolov9
+{
+public:
+    Yolov9(const string& model_path);
+    ~Yolov9() {};
+
+    Resize resize_and_pad(cv::Mat& img);
+    void predict(cv::Mat& img, std::vector<Detection>& output);
+    void draw(Mat& img, vector<Detection>& output, float dw, float dh);
+
+    void setConf(float conf);
+    void setNMS(float nms);
+
+private:
+    
+    ov::CompiledModel compiled_model;
+
+    float NMS_THRESHOLD = 0.4;
+    float CONFIDENCE_THRESHOLD = 0.4;
+
+    vector<Scalar> colors;
+};