First Commit

2026-02-08 07:04:22 +07:00
commit 6228af7a17
7 changed files with 728 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,88 @@
+<h1 align="center"><span>YOLOv9-OpenVINO</span></h1>
+
+C++ and python implementation of [YOLOv9](https://github.com/WongKinYiu/yolov9) using Openvino Backend.
+
+<p align="center" margin: 0 auto;>
+  <img src="result.jpg"/>
+</p>
+
+## 🤖 Model
+
+- Download yolov9-c openvino model: [yolov9-c-converted](https://drive.google.com/file/d/1eBs2zlPmPoa-K2N4enTG3srXmesKQyM9/view?usp=sharing)
+- Or convert your custom yolov9 model to openvino format:
+``` shell
+ovc yolov9-c-converted.onnx --compress_to_fp16 True --input images[1,3,640,640]
+```
+`ovc` is a command-line model converter that converts trained models in onnx or pytorch format to an OpenVINO model in bin, xml format.
+
+
+## ⚙️ Build
+
+#### Python
+
+The following command will install openvino python with the `ovc` api:
+
+``` shell
+cd python
+pip install -r requirement.txt
+```
+
+#### C++
+
+1. Download [openvino](https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/windows/) and install it following [this guide](https://docs.openvino.ai/2023.3/openvino_docs_install_guides_installing_openvino_from_archive_windows.html)
+2. Modify your openvino and opencv paths in [CMakeLists.txt](https://github.com/spacewalk01/yolov9-openvino/blob/main/cpp/CMakeLists.txt)
+3. Run the following command to build the project
+
+``` shell
+cd cpp
+mkdir build
+cd build
+cmake ..
+make
+```
+
+## 🚀 Inference
+
+#### Python
+
+Usage: 
+``` shell
+python main.py --model=<model path> --data_path=<data path> --score_thr=<score> --nms_thr=<nms>
+```
+
+Examples:
+``` shell
+# infer an image
+python main.py --model=yolov9-c-converted.xml --data_path=test.jpg
+# infer a folder(images)
+python main.py --model=yolov9-c-converted.xml --data_path=data
+# infer a video
+python main.py --model=yolov9-c-converted.xml --data_path=test.mp4
+```
+
+#### C++
+
+Usage: 
+``` shell
+yolov9-openvino-cpp.exe <xml model path> <data> <confidence threshold> <nms threshold>
+```
+
+Examples:
+``` shell
+# infer an image
+yolov9-openvino.exe yolov9-c-converted.xml test.jpg 
+# infer a folder(images)
+yolov9-openvino.exe yolov9-c-converted.xml data
+# infer a video
+yolov9-openvino.exe yolov9-c-converted.xml test.mp4 # the video path
+```
+
+## 🖥️ Requirement
+
+- OpenVINO™ 2023.3.0
+- OpenCV
+
+## 🔗 Acknowledgement
+This repo is based on the following projects:
+- [yolov5-openvino](https://github.com/dacquaviva/yolov5-openvino-cpp-python) - Example of using ultralytics YOLOv5 with Openvino in C++ and Python
+- [YOLOv9](https://github.com/WongKinYiu/yolov9) - Learning What You Want to Learn Using Programmable Gradient Information
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -0,0 +1,41 @@
+cmake_minimum_required(VERSION 3.12)
+project(yolov9-openvino)
+
+# Set C++ standard
+set(CMAKE_CXX_STANDARD 17)
+
+
+# Include CUDA directories
+
+# Add source files
+set(SOURCES
+    main.cpp
+    yolov9_openvino.cpp
+)
+
+# Add headers
+set(HEADERS
+    yolov9_openvino.h
+)
+
+# Set your OpenCV path 
+set(OpenCV_DIR "C:\\opencv490\\build")
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+# Set your OpenVINO path 
+set(OPENVINO_DIR "C:\\Program Files (x86)\\Intel\\openvino_2023\\runtime")
+
+# Include TensorRT
+include_directories(${OPENVINO_DIR}/include)
+link_directories(${OPENVINO_DIR}/lib/intel64/release)
+set(OPENVINO_LIBS openvino openvino_c)
+
+# Create an executable
+add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
+
+# Link libraries
+target_link_libraries(${PROJECT_NAME}
+    ${OpenCV_LIBS}
+    ${OPENVINO_LIBS}
+)
--- a/cpp/main.cpp
+++ b/cpp/main.cpp
@@ -0,0 +1,149 @@
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+#include <iostream>
+#include <string>
+#include "yolov9_openvino.h"
+
+
+bool IsPathExist(const string& path) {
+#ifdef _WIN32
+    DWORD fileAttributes = GetFileAttributesA(path.c_str());
+    return (fileAttributes != INVALID_FILE_ATTRIBUTES);
+#else
+    return (access(path.c_str(), F_OK) == 0);
+#endif
+}
+bool IsFile(const string& path) {
+    if (!IsPathExist(path)) {
+        printf("%s:%d %s not exist\n", __FILE__, __LINE__, path.c_str());
+        return false;
+    }
+
+#ifdef _WIN32
+    DWORD fileAttributes = GetFileAttributesA(path.c_str());
+    return ((fileAttributes != INVALID_FILE_ATTRIBUTES) && ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0));
+#else
+    struct stat buffer;
+    return (stat(path.c_str(), &buffer) == 0 && S_ISREG(buffer.st_mode));
+#endif
+}
+
+int main(int argc, char** argv)
+{
+    const string model_file_path{ argv[1] };
+    const string path{ argv[2] };
+    vector<string> imagePathList;
+    bool                     isVideo{ false };
+    assert(argc >= 3);
+
+    float conf_thresh = 0.2;
+    float nms_thresh = 0.3;
+    if (argc > 3)
+    {
+        conf_thresh = std::stof(argv[3]);
+    }
+    else if (argc > 4)
+    {
+        nms_thresh = std::stof(argv[3]);
+    }
+
+    if (IsFile(path))
+    {
+        string suffix = path.substr(path.find_last_of('.') + 1);
+        if (suffix == "jpg" || suffix == "jpeg" || suffix == "png")
+        {
+            imagePathList.push_back(path);
+        }
+        else if (suffix == "mp4" || suffix == "avi" || suffix == "m4v" || suffix == "mpeg" || suffix == "mov" || suffix == "mkv" || suffix == "webm")
+        {
+            isVideo = true;
+        }
+        else {
+            printf("suffix %s is wrong !!!\n", suffix.c_str());
+            abort();
+        }
+    }
+    else if (IsPathExist(path))
+    {
+        glob(path + "/*.jpg", imagePathList);
+    }
+
+    // Assume it's a folder, add logic to handle folders
+    // init model
+    Yolov9 model(model_file_path);
+    model.setConf(conf_thresh);
+    model.setNMS(nms_thresh);
+
+    if (isVideo) {
+        //path to video
+        string VideoPath = path;
+        // open cap
+        VideoCapture cap(VideoPath);
+
+        int width = cap.get(CAP_PROP_FRAME_WIDTH);
+        int height = cap.get(CAP_PROP_FRAME_HEIGHT);
+
+        // Create a VideoWriter object to save the processed video
+        VideoWriter output_video("output_video.avi", VideoWriter::fourcc('M', 'J', 'P', 'G'), 30, Size(width, height));
+        while (1)
+        {
+            Mat frame;
+            cap >> frame;
+
+            if (frame.empty()) break;
+
+            Resize res = model.resize_and_pad(frame);
+
+            vector<Detection> bboxes;
+            model.predict(res.resized_image, bboxes);
+            model.draw(frame, bboxes, res.dw, res.dh);
+
+            cv::imshow("prediction", frame);
+            output_video.write(frame);
+            cv::waitKey(1);
+        }
+
+        // Release resources
+        cv::destroyAllWindows();
+        cap.release();
+        output_video.release();
+    }
+    else {
+        // path to folder saves images
+        string imageFolderPath_out = "results/";
+        for (const auto& imagePath : imagePathList)
+        {
+            // open image
+            Mat frame = imread(imagePath);
+            if (frame.empty())
+            {
+                cerr << "Error reading image: " << imagePath << endl;
+                continue;
+            }
+
+            Resize res = model.resize_and_pad(frame);
+
+            vector<Detection> bboxes;
+            model.predict(res.resized_image, bboxes);
+            model.draw(frame, bboxes, res.dw, res.dh);
+
+            istringstream iss(imagePath);
+            string token;
+            while (getline(iss, token, '/'))
+            {
+            }
+            imwrite(imageFolderPath_out + token, frame);
+            std::cout << imageFolderPath_out + token << endl;
+
+            cv::imshow("prediction", frame);
+            cv::waitKey(0);
+        }
+    }
+
+    return 0;
+}
--- a/cpp/yolov9_openvino.cpp
+++ b/cpp/yolov9_openvino.cpp
@@ -0,0 +1,170 @@
+#include "yolov9_openvino.h"
+#include <opencv2/dnn.hpp>
+
+const vector<string> coconame = {
+    "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush" };
+
+
+Resize Yolov9::resize_and_pad(cv::Mat& img) 
+{
+    ov::Shape input_shape = compiled_model.input().get_shape();
+    
+    float width = img.cols;
+    float height = img.rows;
+    float r = float(input_shape[1] / max(width, height));
+    int new_unpadW = int(round(width * r));
+    int new_unpadH = int(round(height * r));
+    Resize resize;
+    cv::resize(img, resize.resized_image, cv::Size(new_unpadW, new_unpadH), 0, 0, cv::INTER_AREA);
+
+    resize.dw = input_shape[1] - new_unpadW;
+    resize.dh = input_shape[2] - new_unpadH;
+    cv::Scalar color = cv::Scalar(100, 100, 100);
+    cv::copyMakeBorder(resize.resized_image, resize.resized_image, 0, resize.dh, 0, resize.dw, cv::BORDER_CONSTANT, color);
+
+    return resize;
+}
+
+Yolov9::Yolov9(const string &model_path)
+{
+    // Step 1. Initialize OpenVINO Runtime core
+    ov::Core core;
+    // Step 2. Read a model
+    std::shared_ptr<ov::Model> model = core.read_model(model_path);
+    // Step 3. Inizialize Preprocessing for the model
+    ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
+    // Specify input image format
+    ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
+    // Specify preprocess pipeline to input image without resizing
+    ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255., 255., 255. });
+    //  Specify model's input layout
+    ppp.input().model().set_layout("NCHW");
+    // Specify output results format
+    ppp.output().tensor().set_element_type(ov::element::f32);
+    // Embed above steps in the graph
+    model = ppp.build();
+    compiled_model = core.compile_model(model, "CPU");
+
+    // Create random colors
+    random_device rd;
+    mt19937 gen(rd());
+    uniform_int_distribution<int> dis(100, 255);
+    for (int i = 0; i < coconame.size(); i++)
+    {
+        Scalar color = Scalar(dis(gen), dis(gen), dis(gen));
+        colors.push_back(color);
+    }
+}
+
+void Yolov9::predict(cv::Mat &img, std::vector<Detection> &output)
+{
+    // Step 5. Create tensor from image
+    float* input_data = (float*)img.data;
+    ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data);
+
+    // Step 6. Create an infer request for model inference 
+    ov::InferRequest infer_request = compiled_model.create_infer_request();
+    infer_request.set_input_tensor(input_tensor);
+    infer_request.infer();
+
+    //Step 7. Retrieve inference results 
+    const ov::Tensor& output_tensor = infer_request.get_output_tensor();
+    ov::Shape output_shape = output_tensor.get_shape();
+
+    float* detections = output_tensor.data<float>();
+
+    // Step 8. Postprocessing including NMS  
+    vector<Rect> boxes;
+    vector<int> class_ids;
+    vector<float> confidences;
+
+    const Mat det_output(output_shape[1], output_shape[2], CV_32F, detections);
+
+    for (int i = 0; i < det_output.cols; ++i) {
+        const Mat classes_scores = det_output.col(i).rowRange(4, output_shape[1]);
+        Point class_id_point;
+        double score;
+        minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
+
+        if (score > CONFIDENCE_THRESHOLD) {
+            const float cx = det_output.at<float>(0, i);
+            const float cy = det_output.at<float>(1, i);
+            const float ow = det_output.at<float>(2, i);
+            const float oh = det_output.at<float>(3, i);
+            Rect box;
+            box.x = static_cast<int>((cx - 0.5 * ow));
+            box.y = static_cast<int>((cy - 0.5 * oh));
+            box.width = static_cast<int>(ow);
+            box.height = static_cast<int>(oh);
+
+            boxes.push_back(box);
+            class_ids.push_back(class_id_point.y);
+            confidences.push_back(score);
+        }
+    }
+
+    vector<int> nms_result;
+    dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
+
+    for (int i = 0; i < nms_result.size(); i++)
+    {
+        Detection result;
+        int idx = nms_result[i];
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+        result.box = boxes[idx];
+        output.push_back(result);
+    }
+}
+
+void Yolov9::draw(Mat& img, vector<Detection>& output, float dw, float dh)
+{
+    // Step 9. Print results and save Figure with detections
+    ov::Shape input_shape = compiled_model.input().get_shape();
+
+    for (int i = 0; i < output.size(); i++)
+    {
+        auto detection = output[i];
+        auto box = detection.box;
+        auto classId = detection.class_id;
+        auto confidence = detection.confidence;
+        float rx = (float)img.cols / (float)(input_shape[1] - dw);
+        float ry = (float)img.rows / (float)(input_shape[2] - dh);
+        box.x = rx * box.x;
+        box.y = ry * box.y;
+        box.width = rx * box.width;
+        box.height = ry * box.height;
+        float xmax = box.x + box.width;
+        float ymax = box.y + box.height;
+
+        rectangle(img, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), colors[classId], 3);
+
+        // Detection box text
+        string class_string = coconame[classId] + ' ' + to_string(confidence).substr(0, 4);
+        Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0);
+        Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20);
+        rectangle(img, text_rect, colors[classId], FILLED);
+        putText(img, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0);
+    }
+}
+
+void Yolov9::setConf(float conf)
+{
+    CONFIDENCE_THRESHOLD = conf;
+}
+
+void Yolov9::setNMS(float nms)
+{
+    NMS_THRESHOLD = nms;
+}
--- a/cpp/yolov9_openvino.h
+++ b/cpp/yolov9_openvino.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include <openvino/openvino.hpp>
+#include <random>
+
+using namespace std;
+using namespace cv;
+
+struct Detection
+{
+    int class_id;
+    float confidence;
+    cv::Rect box;
+};
+
+
+struct Resize
+{
+    cv::Mat resized_image;
+    int dw;
+    int dh;
+};
+
+class Yolov9
+{
+public:
+    Yolov9(const string& model_path);
+    ~Yolov9() {};
+
+    Resize resize_and_pad(cv::Mat& img);
+    void predict(cv::Mat& img, std::vector<Detection>& output);
+    void draw(Mat& img, vector<Detection>& output, float dw, float dh);
+
+    void setConf(float conf);
+    void setNMS(float nms);
+
+private:
+    
+    ov::CompiledModel compiled_model;
+
+    float NMS_THRESHOLD = 0.4;
+    float CONFIDENCE_THRESHOLD = 0.4;
+
+    vector<Scalar> colors;
+};
--- a/python/main.py
+++ b/python/main.py
@@ -0,0 +1,232 @@
+from pathlib import Path
+
+import openvino as ov
+from openvino.preprocess import PrePostProcessor
+from openvino.preprocess import ColorFormat
+from openvino import Layout, Type
+
+import numpy as np
+import cv2
+import argparse
+import os
+
+coconame = [
+    "karung",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
+    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
+    "bird",           "cat",        "dog",           "horse",         "sheep",        "cow",           "elephant",
+    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",     "handbag",       "tie",
+    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball",  "kite",          "baseball bat",
+    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",       "wine glass",    "cup",
+    "fork",           "knife",      "spoon",         "bowl",          "banana",       "apple",         "sandwich",
+    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",        "donut",         "cake",
+    "chair",          "couch",      "potted plant",  "bed",           "dining table", "toilet",        "tv",
+    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",   "microwave",     "oven",
+    "toaster",        "sink",       "refrigerator",  "book",          "clock",        "vase",          "scissors",
+    "teddy bear",     "hair drier", "toothbrush" ]
+
+
+class Yolov9:
+    def __init__(self, xml_model_path="./model/yolov9-c-converted.xml", conf=0.2, nms=0.4):
+        # Step 1. Initialize OpenVINO Runtime core
+        core = ov.Core()
+        # Step 2. Read a model
+        model = core.read_model(str(Path(xml_model_path)))
+
+        # Step 3. Inizialize Preprocessing for the model
+        ppp = PrePostProcessor(model)
+        # Specify input image format
+        ppp.input().tensor().set_element_type(Type.u8).set_layout(Layout("NHWC")).set_color_format(ColorFormat.BGR)
+        #  Specify preprocess pipeline to input image without resizing
+        ppp.input().preprocess().convert_element_type(Type.f32).convert_color(ColorFormat.RGB).scale([255., 255., 255.])
+        # Specify model's input layout
+        ppp.input().model().set_layout(Layout("NCHW"))
+        #  Specify output results format
+        ppp.output().tensor().set_element_type(Type.f32)
+        # Embed above steps in the graph
+        model = ppp.build()
+
+        self.compiled_model = core.compile_model(model, "CPU")
+
+        self.input_width = 640
+        self.input_height = 640
+        self.conf_thresh = conf
+        self.nms_thresh = nms
+        self.colors = []
+
+        # Create random colors
+        np.random.seed(42)  # Setting seed for reproducibility
+
+        for i in range(len(coconame)):
+            color = tuple(np.random.randint(100, 256, size=3))
+            self.colors.append(color)
+
+    def resize_and_pad(self, image):
+
+        old_size = image.shape[:2] 
+        ratio = float(self.input_width/max(old_size))#fix to accept also rectangular images
+        new_size = tuple([int(x*ratio) for x in old_size])
+
+        image = cv2.resize(image, (new_size[1], new_size[0]))
+        
+        delta_w = self.input_width - new_size[1]
+        delta_h = self.input_height - new_size[0]
+        
+        color = [100, 100, 100]
+        new_im = cv2.copyMakeBorder(image, 0, delta_h, 0, delta_w, cv2.BORDER_CONSTANT, value=color)
+        
+        return new_im, delta_w, delta_h
+
+    def predict(self, img):
+
+        # Step 4. Create tensor from image
+        input_tensor = np.expand_dims(img, 0)
+
+        # Step 5. Create an infer request for model inference 
+        infer_request = self.compiled_model.create_infer_request()
+        infer_request.infer({0: input_tensor})
+
+        # Step 6. Retrieve inference results 
+        output = infer_request.get_output_tensor()
+        detections = output.data[0].T
+
+        # Step 7. Postprocessing including NMS  
+        boxes = []
+        class_ids = []
+        confidences = []
+        for prediction in detections:
+            classes_scores = prediction[4:]
+            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
+            class_id = max_indx[1]
+            if (classes_scores[class_id] > self.conf_thresh):
+                confidences.append(classes_scores[class_id])
+                class_ids.append(class_id)
+                x, y, w, h = prediction[0].item(), prediction[1].item(), prediction[2].item(), prediction[3].item()
+                xmin = x - (w / 2)
+                ymin = y - (h / 2)
+                box = np.array([xmin, ymin, w, h])
+                boxes.append(box)
+
+        indexes = cv2.dnn.NMSBoxes(boxes, confidences, self.conf_thresh, self.nms_thresh)
+
+        detections = []
+        for i in indexes:
+            j = i.item()
+            detections.append({"class_index": class_ids[j], "confidence": confidences[j], "box": boxes[j]})
+
+        return detections
+
+    def draw(self, img, detections, dw, dh):
+        # Step 8. Print results and save Figure with detections
+        for detection in detections:
+        
+            box = detection["box"]
+            classId = detection["class_index"]
+            confidence = detection["confidence"]
+
+            rx = img.shape[1] / (self.input_width - dw)
+            ry = img.shape[0] / (self.input_height - dh)
+            box[0] = rx * box[0]
+            box[1] = ry * box[1]
+            box[2] = rx * box[2]
+            box[3] = ry * box[3]
+
+            xmax = box[0] + box[2]
+            ymax = box[1] + box[3]
+
+            # Drawing detection box
+            cv2.rectangle(img, (int(box[0]), int(box[1])), (int(xmax), int(ymax)), tuple(map(int, self.colors[classId])), 3)
+
+            # Detection box text
+            class_string = coconame[classId] + ' ' + str(confidence)[:4]
+            text_size, _ = cv2.getTextSize(class_string, cv2.FONT_HERSHEY_DUPLEX, 1, 2)
+            text_rect = (box[0], box[1] - 40, text_size[0] + 10, text_size[1] + 20)
+            cv2.rectangle(img, 
+                (int(text_rect[0]), int(text_rect[1])), 
+                (int(text_rect[0] + text_rect[2]), int(text_rect[1] + text_rect[3])), 
+                tuple(map(int, self.colors[classId])), cv2.FILLED)
+            cv2.putText(img, class_string, (int(box[0] + 5), int(box[1] - 10)), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
+
+def make_parser():
+    parser = argparse.ArgumentParser("onnxruntime inference")
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default="yolov9-c-converted.onnx",
+        help="Input your onnx model.",
+    )
+    parser.add_argument(
+        "-i",
+        "--data_path",
+        type=str,
+        default='videos/palace.mp4',
+        help="Path to your input image.",
+    )
+    parser.add_argument(
+        "-s",
+        "--score_thr",
+        type=float,
+        default=0.1,
+        help="Score threshould to filter the result.",
+    )
+    parser.add_argument(
+        "-n",
+        "--nms_thr",
+        type=float,
+        default=0.3,
+        help="NMS threshould.",
+    )
+    
+    return parser
+
+# Process a single image
+def process_image(model, image_path):
+    img = cv2.imread(image_path)
+    img_resized, dw, dh = model.resize_and_pad(img)
+    results = model.predict(img_resized)
+    model.draw(img, results, dw, dh)
+    cv2.imshow("result", img)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+# Process a folder of images
+def process_folder(model, folder_path):
+    for filename in os.listdir(folder_path):
+        if filename.endswith(".jpg") or filename.endswith(".png"):
+            image_path = os.path.join(folder_path, filename)
+            process_image(model, image_path)
+
+# Process a video
+def process_video(model, video_path):
+    cap = cv2.VideoCapture(video_path)
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        img_resized, dw, dh = model.resize_and_pad(frame)
+        results = model.predict(img_resized)
+        model.draw(frame, results, dw, dh)
+        cv2.imshow("result", frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+def main():
+    args = make_parser().parse_args()
+
+    # Initialize YOLOv9 model (assuming xml openvino model)
+    model = Yolov9(args.model)
+
+    if args.data_path.endswith('.jpg') or args.data_path.endswith('.png'):
+        process_image(model, args.data_path)
+    elif os.path.isdir(args.data_path):
+        process_folder(model, args.data_path)
+    elif args.data_path.endswith('.mp4'):  # Add support for other video formats
+        process_video(model, args.data_path)
+    else:
+        print("Error: Unsupported file format")    
+
+if __name__ == "__main__":
+    main()
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -0,0 +1,2 @@
+openvino
+opencv-python