From 4fa9af81b49e4908802164572786f8f24647478e Mon Sep 17 00:00:00 2001 From: root Date: Tue, 9 Jun 2026 01:37:16 +0800 Subject: [PATCH 1/3] [NPU:rknn] initial commit --- CMakeLists.txt | 8 + docs/inference/npu.md | 100 +++++ source/backend/rknn/CMakeLists.txt | 15 + source/backend/rknn/backend/RKNNBackend.cpp | 379 +++++++++++++++++++ source/backend/rknn/backend/RKNNBackend.hpp | 56 +++ source/core/Backend.cpp | 6 + tools/converter/CMakeLists.txt | 4 + tools/converter/include/config.hpp | 6 + tools/converter/source/MNNConverter.cpp | 5 +- tools/converter/source/common/RKNNBundle.cpp | 300 +++++++++++++++ tools/converter/source/common/RKNNBundle.hpp | 23 ++ tools/converter/source/common/cli.cpp | 47 ++- 12 files changed, 940 insertions(+), 9 deletions(-) create mode 100644 source/backend/rknn/CMakeLists.txt create mode 100644 source/backend/rknn/backend/RKNNBackend.cpp create mode 100644 source/backend/rknn/backend/RKNNBackend.hpp create mode 100644 tools/converter/source/common/RKNNBundle.cpp create mode 100644 tools/converter/source/common/RKNNBundle.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e292c1379b..24313ace46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,6 +282,7 @@ option(MNN_COREML "Enable CoreML" OFF) option(MNN_NNAPI "Enable NNAPI" OFF) option(MNN_QNN "Enable QNN" OFF) option(MNN_QNN_ONLINE_FINALIZE "Enable QNN Online Finalize" ON) +option(MNN_RKNN "Enable RKNN runtime backend" OFF) option(MNN_GPU_TIME_PROFILE "Enable time profiling for the OpenCL backend and Vulkan backend." OFF) option(MNN_GPU_PROFILE_SILENT "When GPU time profiling is enabled, only accumulate total time without printing per-kernel details." OFF) @@ -680,6 +681,13 @@ IF(MNN_QNN) list(APPEND MNN_OBJECTS_TO_LINK $) ENDIF() +# RKNN +IF(MNN_RKNN) + add_definitions(-DMNN_RKNN_ENABLED=1) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/source/backend/rknn/) + list(APPEND MNN_OBJECTS_TO_LINK $) +ENDIF() + # NEUROPILOT IF(MNN_NEUROPILOT) target_compile_options(MNNCore PRIVATE -DMNN_NEUROPILOT=1) diff --git a/docs/inference/npu.md b/docs/inference/npu.md index b740343914..a239d58ad8 100644 --- a/docs/inference/npu.md +++ b/docs/inference/npu.md @@ -5,6 +5,7 @@ - CoreML - NNAPI - HIAI +- RKNN ## QNN @@ -178,3 +179,102 @@ cp -r ${DDK}/include ${MNN}/source/backend/hiai/3rdParty/include 1. cmake 参数打开npu开关: -DMNN_NPU=ON 2. backend type设置成:MNN_FORWARD_USER_0 3. 执行可执行程序(需动态加载:libMNN_NPU.so, libhiai_ir_build.so, libhiai_ir.so, libhiai.so) + +## RKNN +适用于 Rockchip RKNPU 平台。当前接入方式不是在线逐算子构图,而是同一份 ONNX 在 Host 侧同时生成: +- 包装后的 `.mnn` +- sidecar `.rknn` + +其中 `.mnn` 内部只保留 `Input + Extra(type="RKNN")` 包装图,运行时由 MNN 的 RKNN backend 调用 RKNN C API 执行 `.rknn`。 + +### RKNN 后端整体介绍 + +- Host 侧通过 `MNNConvert --rknn` 完成双产物生成,不走 `compilefornpu` 的 `MNN -> NPU` 逐算子编译链路。 +- Device 侧通过 RKNN C API 加载 `.rknn` 并执行,当前 backend 注册为 `MNN_FORWARD_USER_2`。 +- RKNN backend 读取 runtime 库路径、转换脚本路径、目标平台等信息时,不做硬编码,全部从环境变量读取;缺失时直接报 `MNN_ERROR`。 + +### 编译 + +#### Host,编译带 RKNN 转换能力的 MNNConvert + +需要开启: +- `-DMNN_BUILD_CONVERTER=ON` +- `-DMNN_RKNN_CONVERT_MODE=ON` + +示例: + +```bash +cmake -S ${MNN_ROOT} -B ${BUILD_DIR} \ + -DMNN_BUILD_CONVERTER=ON \ + -DMNN_RKNN_CONVERT_MODE=ON + +cmake --build ${BUILD_DIR} --target MNNConvert -j8 +``` + +#### Device/Runtime,编译带 RKNN backend 的 MNN + +需要开启: +- `-DMNN_RKNN=ON` +- `-DRKNN_API_INCLUDE_DIR=/path/to/rknn_api/include` + +示例: + +```bash +cmake -S ${MNN_ROOT} -B ${BUILD_DIR} \ + -DMNN_RKNN=ON \ + -DRKNN_API_INCLUDE_DIR=/path/to/rknn_api/include + +cmake --build ${BUILD_DIR} --target MNN -j8 +``` + +### Host,生成 RKNN 包装模型 + +调用 `MNNConvert --rknn` 前,必须设置以下环境变量: + +- `MNN_RKNN_TARGET` + - 例如 `rv1126b` +- `MNN_RKNN_PYTHON` + - RKNN Toolkit 所在 Python 解释器 +- `MNN_RKNN_SCRIPT` + - ONNX 转 `.rknn` 的脚本路径 +- `MNN_RKNN_OUTPUT_DIR` + - `.rknn` 和 manifest 的输出目录 + +示例: + +```bash +export MNN_RKNN_TARGET=rv1126b +export MNN_RKNN_PYTHON=/path/to/python +export MNN_RKNN_SCRIPT=/path/to/to_rknn.py +export MNN_RKNN_OUTPUT_DIR=/path/to/output/sidecar + +${BUILD_DIR}/MNNConvert \ + -f ONNX \ + --modelFile model.onnx \ + --MNNModel model.mnn \ + --rknn +``` + +执行成功后会生成: +- `model.mnn` + - RKNN wrapper 模型 +- `${MNN_RKNN_OUTPUT_DIR}/model_.rknn` +- `${MNN_RKNN_OUTPUT_DIR}/model.rknn.bundle.json` + +### Device,运行 + +运行时必须设置: +- `MNN_RKNN_RUNTIME_LIB` + - 指向目标板上的 `librknnrt.so` + +并在创建 Session 时选择: +- backend type = `MNN_FORWARD_USER_2` + +如果 `.rknn` 路径在 wrapper `.mnn` 中是相对路径,则需要确保模型外部路径设置正确,使 MNN 能解析 sidecar 所在目录。 + +### 当前限制 + +- 当前 RKNN backend 只执行 `Extra(type="RKNN")` 节点,不支持逐算子 RKNN backend。 +- 当前实现走 host buffer copy 路径,尚未做 zero-copy。 +- 当前输出路径按 `float32` 处理。 +- 当前主目标是板端运行;PC 侧如果没有可用的 x86 `librknnrt.so`,则不能直接用 MNN runtime 在 Host 上模拟执行 RKNN backend。 diff --git a/source/backend/rknn/CMakeLists.txt b/source/backend/rknn/CMakeLists.txt new file mode 100644 index 0000000000..bb95975eb1 --- /dev/null +++ b/source/backend/rknn/CMakeLists.txt @@ -0,0 +1,15 @@ +file(GLOB BACKEND_SRCS ${CMAKE_CURRENT_LIST_DIR}/backend/*.cpp) +set(MNN_RKNN_SRCS ${BACKEND_SRCS}) + +set(_RKNN_API_INCLUDE "$ENV{RKNN_API_INCLUDE_DIR}") +if (DEFINED RKNN_API_INCLUDE_DIR AND NOT "${RKNN_API_INCLUDE_DIR}" STREQUAL "") + set(_RKNN_API_INCLUDE "${RKNN_API_INCLUDE_DIR}") +endif() + +if ("${_RKNN_API_INCLUDE}" STREQUAL "") + message(FATAL_ERROR "MNN_RKNN=ON requires RKNN_API_INCLUDE_DIR (or env RKNN_API_INCLUDE_DIR) to point to the directory containing rknn_api.h") +endif() + +add_library(MNN_RKNN OBJECT ${MNN_RKNN_SRCS}) +target_include_directories(MNN_RKNN PRIVATE ${CMAKE_CURRENT_LIST_DIR}/backend/) +target_include_directories(MNN_RKNN PRIVATE ${_RKNN_API_INCLUDE}) diff --git a/source/backend/rknn/backend/RKNNBackend.cpp b/source/backend/rknn/backend/RKNNBackend.cpp new file mode 100644 index 0000000000..a9999ff726 --- /dev/null +++ b/source/backend/rknn/backend/RKNNBackend.cpp @@ -0,0 +1,379 @@ +#include "RKNNBackend.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "MNN_generated.h" +#include "core/MNNFileUtils.h" +#include "core/Macro.h" +#include "core/TensorUtils.hpp" +#include "rknn_api.h" + +namespace MNN { +namespace RKNN { +namespace { + +static const char* kRuntimeLibEnv = "MNN_RKNN_RUNTIME_LIB"; +static const char* kExtraTypeName = "RKNN"; +static const char* kModelPathAttr = "model_path"; + +class HostMemObj : public Backend::MemObj { +public: + explicit HostMemObj(size_t size) : mPtr(std::malloc(size)) { + } + ~HostMemObj() override { + std::free(mPtr); + } + MemChunk chunk() override { + return MemChunk(mPtr, 0); + } + bool valid() const { + return nullptr != mPtr; + } +private: + void* mPtr = nullptr; +}; + +struct RKNNApi { + using Init = int (*)(rknn_context*, void*, uint32_t, uint32_t, rknn_init_extend*); + using Destroy = int (*)(rknn_context); + using Query = int (*)(rknn_context, rknn_query_cmd, void*, uint32_t); + using InputsSet = int (*)(rknn_context, uint32_t, rknn_input[]); + using Run = int (*)(rknn_context, rknn_run_extend*); + using OutputsGet = int (*)(rknn_context, uint32_t, rknn_output[], rknn_output_extend*); + using OutputsRelease = int (*)(rknn_context, uint32_t, rknn_output[]); + + bool loaded = false; + void* handle = nullptr; + Init init = nullptr; + Destroy destroy = nullptr; + Query query = nullptr; + InputsSet inputsSet = nullptr; + Run run = nullptr; + OutputsGet outputsGet = nullptr; + OutputsRelease outputsRelease = nullptr; +}; + +static const RKNNApi* loadApi() { + static std::once_flag once; + static RKNNApi api; + std::call_once(once, []() { + auto libPath = std::getenv(kRuntimeLibEnv); + if (nullptr == libPath || libPath[0] == '\0') { + MNN_ERROR("MNN_RKNN: missing environment variable %s\n", kRuntimeLibEnv); + return; + } + api.handle = dlopen(libPath, RTLD_NOW | RTLD_LOCAL); + if (nullptr == api.handle) { + MNN_ERROR("MNN_RKNN: dlopen failed for %s, error: %s\n", libPath, dlerror()); + return; + } +#define MNN_RKNN_LOAD_SYMBOL(typeName, field, symbol) \ + api.field = reinterpret_cast(dlsym(api.handle, symbol)); \ + if (nullptr == api.field) { \ + MNN_ERROR("MNN_RKNN: dlsym failed for %s\n", symbol); \ + return; \ + } + MNN_RKNN_LOAD_SYMBOL(Init, init, "rknn_init"); + MNN_RKNN_LOAD_SYMBOL(Destroy, destroy, "rknn_destroy"); + MNN_RKNN_LOAD_SYMBOL(Query, query, "rknn_query"); + MNN_RKNN_LOAD_SYMBOL(InputsSet, inputsSet, "rknn_inputs_set"); + MNN_RKNN_LOAD_SYMBOL(Run, run, "rknn_run"); + MNN_RKNN_LOAD_SYMBOL(OutputsGet, outputsGet, "rknn_outputs_get"); + MNN_RKNN_LOAD_SYMBOL(OutputsRelease, outputsRelease, "rknn_outputs_release"); +#undef MNN_RKNN_LOAD_SYMBOL + api.loaded = true; + }); + return api.loaded ? &api : nullptr; +} + +static std::string getStringAttr(const Extra* extra, const char* key) { + if (nullptr == extra || nullptr == extra->attr()) { + return ""; + } + for (int i = 0; i < extra->attr()->size(); ++i) { + auto attr = extra->attr()->GetAs(i); + if (nullptr == attr || nullptr == attr->key()) { + continue; + } + if (attr->key()->str() == key && nullptr != attr->s()) { + return attr->s()->str(); + } + } + return ""; +} + +static std::string resolveModelPath(const Backend* backend, const std::string& path) { + if (path.empty()) { + return ""; + } + if (!path.empty() && path[0] == '/') { + return path; + } + return MNNFilePathConcat(backend->pNPUModelDirPath, path); +} + +static rknn_tensor_type mapTensorType(const Tensor* tensor) { + auto type = tensor->getType(); + if (type.code == halide_type_float && type.bits == 32) { + return RKNN_TENSOR_FLOAT32; + } + if (type.code == halide_type_uint && type.bits == 8) { + return RKNN_TENSOR_UINT8; + } + if (type.code == halide_type_int && type.bits == 8) { + return RKNN_TENSOR_INT8; + } + if (type.code == halide_type_int && type.bits == 32) { + return RKNN_TENSOR_INT32; + } + return RKNN_TENSOR_FLOAT32; +} + +static rknn_tensor_format mapTensorFormat(const Tensor* tensor) { + auto format = TensorUtils::getDescribe(tensor)->dimensionFormat; + if (format == MNN_DATA_FORMAT_NHWC) { + return RKNN_TENSOR_NHWC; + } + return RKNN_TENSOR_NCHW; +} + +static Tensor::DimensionType getHostTensorDimType(const Tensor* tensor) { + return tensor->getDimensionType(); +} + +class RKNNExecution : public Execution { +public: + RKNNExecution(Backend* backend, const Op* op, const RKNNApi* api) : Execution(backend), mApi(api) { + if (nullptr == op || op->type() != OpType_Extra || nullptr == op->main_as_Extra()) { + MNN_ERROR("MNN_RKNN: invalid op for RKNN execution\n"); + mValid = false; + return; + } + auto extra = op->main_as_Extra(); + if (extra->type()->str() != kExtraTypeName) { + MNN_ERROR("MNN_RKNN: unsupported Extra type\n"); + mValid = false; + return; + } + mModelPath = resolveModelPath(backend, getStringAttr(extra, kModelPathAttr)); + if (mModelPath.empty()) { + MNN_ERROR("MNN_RKNN: Extra(%s) requires attr '%s'\n", kExtraTypeName, kModelPathAttr); + mValid = false; + return; + } + if (!MNNFileExist(mModelPath.c_str())) { + MNN_ERROR("MNN_RKNN: model file does not exist: %s\n", mModelPath.c_str()); + mValid = false; + return; + } + if (mApi->init(&mContext, (void*)mModelPath.c_str(), 0, 0, nullptr) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: rknn_init failed for %s\n", mModelPath.c_str()); + mValid = false; + return; + } + if (mApi->query(mContext, RKNN_QUERY_IN_OUT_NUM, &mIoNum, sizeof(mIoNum)) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: query in/out num failed\n"); + mValid = false; + return; + } + mInputAttrs.resize(mIoNum.n_input); + mOutputAttrs.resize(mIoNum.n_output); + for (uint32_t i = 0; i < mIoNum.n_input; ++i) { + std::memset(&mInputAttrs[i], 0, sizeof(rknn_tensor_attr)); + mInputAttrs[i].index = i; + if (mApi->query(mContext, RKNN_QUERY_INPUT_ATTR, &mInputAttrs[i], sizeof(rknn_tensor_attr)) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: query input attr failed: %u\n", i); + mValid = false; + return; + } + } + for (uint32_t i = 0; i < mIoNum.n_output; ++i) { + std::memset(&mOutputAttrs[i], 0, sizeof(rknn_tensor_attr)); + mOutputAttrs[i].index = i; + if (mApi->query(mContext, RKNN_QUERY_OUTPUT_ATTR, &mOutputAttrs[i], sizeof(rknn_tensor_attr)) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: query output attr failed: %u\n", i); + mValid = false; + return; + } + } + } + + ~RKNNExecution() override { + if (mContext != 0 && nullptr != mApi) { + mApi->destroy(mContext); + } + } + + ErrorCode onResize(const std::vector& inputs, const std::vector& outputs) override { + if ((uint32_t)inputs.size() != mIoNum.n_input || (uint32_t)outputs.size() != mIoNum.n_output) { + MNN_ERROR("MNN_RKNN: input/output count mismatch, expect %u/%u, got %zu/%zu\n", + mIoNum.n_input, mIoNum.n_output, inputs.size(), outputs.size()); + return INVALID_VALUE; + } + return NO_ERROR; + } + + ErrorCode onExecute(const std::vector& inputs, const std::vector& outputs) override { + std::vector> hostInputs; + std::vector rknnInputs(inputs.size()); + for (size_t i = 0; i < inputs.size(); ++i) { + hostInputs.emplace_back(new Tensor(inputs[i], getHostTensorDimType(inputs[i]))); + if (!MNNCPUCopyBuffer(inputs[i], hostInputs.back().get())) { + MNN_ERROR("MNN_RKNN: failed to copy input tensor %zu to host\n", i); + return INVALID_VALUE; + } + std::memset(&rknnInputs[i], 0, sizeof(rknn_input)); + rknnInputs[i].index = (uint32_t)i; + rknnInputs[i].buf = hostInputs.back()->buffer().host; + rknnInputs[i].size = hostInputs.back()->size(); + rknnInputs[i].pass_through = 0; + rknnInputs[i].type = mapTensorType(hostInputs.back().get()); + rknnInputs[i].fmt = mapTensorFormat(hostInputs.back().get()); + } + if (mApi->inputsSet(mContext, (uint32_t)rknnInputs.size(), rknnInputs.data()) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: rknn_inputs_set failed\n"); + return INVALID_VALUE; + } + if (mApi->run(mContext, nullptr) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: rknn_run failed\n"); + return INVALID_VALUE; + } + + std::vector rknnOutputs(outputs.size()); + for (size_t i = 0; i < outputs.size(); ++i) { + std::memset(&rknnOutputs[i], 0, sizeof(rknn_output)); + rknnOutputs[i].index = (uint32_t)i; + rknnOutputs[i].want_float = 1; + rknnOutputs[i].is_prealloc = 0; + } + if (mApi->outputsGet(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data(), nullptr) != RKNN_SUCC) { + MNN_ERROR("MNN_RKNN: rknn_outputs_get failed\n"); + return INVALID_VALUE; + } + + for (size_t i = 0; i < outputs.size(); ++i) { + if (outputs[i]->getType().code != halide_type_float || outputs[i]->getType().bits != 32) { + MNN_ERROR("MNN_RKNN: only float32 outputs are supported in the first runtime version\n"); + mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); + return NOT_SUPPORT; + } + Tensor hostOutput(outputs[i], getHostTensorDimType(outputs[i])); + auto copySize = ALIMIN((int)hostOutput.size(), (int)rknnOutputs[i].size); + std::memcpy(hostOutput.buffer().host, rknnOutputs[i].buf, copySize); + if (!MNNCPUCopyBuffer(&hostOutput, outputs[i])) { + MNN_ERROR("MNN_RKNN: failed to copy output tensor %zu from host\n", i); + mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); + return INVALID_VALUE; + } + } + mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); + return NO_ERROR; + } + +private: + const RKNNApi* mApi = nullptr; + std::string mModelPath; + rknn_context mContext = 0; + rknn_input_output_num mIoNum{}; + std::vector mInputAttrs; + std::vector mOutputAttrs; +}; + +} // namespace + +RKNNBackend::RKNNBackend(const RKNNRuntime* runtime) : Backend(MNN_FORWARD_USER_2), mRuntime(runtime) { +} + +Execution* RKNNBackend::onCreate(const std::vector& inputs, const std::vector& outputs, const MNN::Op* op) { + auto api = loadApi(); + if (nullptr == api) { + return nullptr; + } + if (nullptr == op || op->type() != OpType_Extra || nullptr == op->main_as_Extra()) { + return nullptr; + } + auto extra = op->main_as_Extra(); + if (extra->type()->str() != kExtraTypeName) { + return nullptr; + } + auto exe = new RKNNExecution(this, op, api); + if (!exe->valid()) { + delete exe; + return nullptr; + } + return exe; +} + +void RKNNBackend::onResizeBegin() { +} + +ErrorCode RKNNBackend::onResizeEnd() { + return NO_ERROR; +} + +void RKNNBackend::onExecuteBegin() const { +} + +void RKNNBackend::onExecuteEnd() const { +} + +Backend::MemObj* RKNNBackend::onAcquire(const Tensor* tensor, StorageType storageType) { + auto mem = new HostMemObj(tensor->size()); + if (!mem->valid()) { + delete mem; + return nullptr; + } + return mem; +} + +bool RKNNBackend::onClearBuffer() { + return true; +} + +void RKNNBackend::onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const { + MNNCPUCopyBuffer(srcTensor, dstTensor); +} + +const Runtime* RKNNBackend::getRuntime() { + return mRuntime; +} + +RKNNRuntime::RKNNRuntime(const Backend::Info& info) : mInfo(info) { +} + +Backend* RKNNRuntime::onCreate(const BackendConfig* config, Backend* origin) const { + return new RKNNBackend(this); +} + +void RKNNRuntime::onGabageCollect(int level) { +} + +Runtime::CompilerType RKNNRuntime::onGetCompilerType() const { + return Runtime::Compiler_Origin; +} + +Runtime* RKNNRuntimeCreator::onCreate(const Backend::Info& info) const { + if (nullptr == loadApi()) { + return nullptr; + } + return new RKNNRuntime(info); +} + +bool RKNNRuntimeCreator::onValid(Backend::Info& info) const { + info.mode = Backend::Info::DIRECT; + return true; +} + +} // namespace RKNN + +void registerRKNNRuntimeCreator() { + MNNInsertExtraRuntimeCreator(MNN_FORWARD_USER_2, new RKNN::RKNNRuntimeCreator, false); +} + +} // namespace MNN diff --git a/source/backend/rknn/backend/RKNNBackend.hpp b/source/backend/rknn/backend/RKNNBackend.hpp new file mode 100644 index 0000000000..fcc5f02fa0 --- /dev/null +++ b/source/backend/rknn/backend/RKNNBackend.hpp @@ -0,0 +1,56 @@ +#ifndef MNN_RKNNBACKEND_HPP +#define MNN_RKNNBACKEND_HPP + +#include "core/Backend.hpp" +#include "core/Execution.hpp" + +namespace MNN { +namespace RKNN { + +class RKNNRuntime; + +class RKNNBackend : public Backend { +public: + explicit RKNNBackend(const RKNNRuntime* runtime); + ~RKNNBackend() override = default; + + Execution* onCreate(const std::vector& inputs, const std::vector& outputs, + const MNN::Op* op) override; + void onResizeBegin() override; + ErrorCode onResizeEnd() override; + void onExecuteBegin() const override; + void onExecuteEnd() const override; + MemObj* onAcquire(const Tensor* tensor, StorageType storageType) override; + bool onClearBuffer() override; + void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const override; + const Runtime* getRuntime() override; + +private: + const RKNNRuntime* mRuntime; +}; + +class RKNNRuntime : public Runtime { +public: + explicit RKNNRuntime(const Backend::Info& info); + ~RKNNRuntime() override = default; + + Backend* onCreate(const BackendConfig* config = nullptr, Backend* origin = nullptr) const override; + void onGabageCollect(int level) override; + CompilerType onGetCompilerType() const override; + +private: + Backend::Info mInfo; +}; + +class RKNNRuntimeCreator : public RuntimeCreator { +public: + Runtime* onCreate(const Backend::Info& info) const override; + bool onValid(Backend::Info& info) const override; +}; + +} // namespace RKNN + +void registerRKNNRuntimeCreator(); +} // namespace MNN + +#endif diff --git a/source/core/Backend.cpp b/source/core/Backend.cpp index f5140b35ab..4b89e321f9 100644 --- a/source/core/Backend.cpp +++ b/source/core/Backend.cpp @@ -48,6 +48,9 @@ extern void registerNNAPIRuntimeCreator(); #if MNN_QNN_ENABLED extern void registerQNNRuntimeCreator(); #endif +#if MNN_RKNN_ENABLED +extern void registerRKNNRuntimeCreator(); +#endif #ifdef MNN_NEUROPILOT extern void registerNeuroPilot(); #endif @@ -71,6 +74,9 @@ void registerBackend() { #if MNN_QNN_ENABLED registerQNNRuntimeCreator(); #endif +#if MNN_RKNN_ENABLED + registerRKNNRuntimeCreator(); +#endif #if MNN_OPENCL_ENABLED OpenCL::registerOpenCLRuntimeCreator(); #endif diff --git a/tools/converter/CMakeLists.txt b/tools/converter/CMakeLists.txt index 1e757c9f82..6d078f2cf4 100644 --- a/tools/converter/CMakeLists.txt +++ b/tools/converter/CMakeLists.txt @@ -1,6 +1,7 @@ IF(MNN_BUILD_CONVERTER) SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../) option(MNN_BUILD_TORCH "Build Converter support TorchScript." OFF) + option(MNN_RKNN_CONVERT_MODE "Enable RKNN sidecar generation in MNNConvert." OFF) IF(MNN_BUILD_PROTOBUFFER) SET(Protobuf_LIBRARIES libprotobuf) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../3rd_party/protobuf/src) @@ -21,6 +22,9 @@ IF(MNN_BUILD_CONVERTER) include_directories(${CMAKE_CURRENT_LIST_DIR}/include) include_directories(${CMAKE_CURRENT_LIST_DIR}/source/tflite/schema) include_directories(${CMAKE_CURRENT_BINARY_DIR}) + if (MNN_RKNN_CONVERT_MODE) + add_definitions(-DENABLE_RKNN_CONVERT_MODE) + endif() include(${CMAKE_CURRENT_LIST_DIR}/source/compression/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/source/tensorflow/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/source/onnx/CMakeLists.txt) diff --git a/tools/converter/include/config.hpp b/tools/converter/include/config.hpp index 5f2c9931d6..9fe97393a4 100644 --- a/tools/converter/include/config.hpp +++ b/tools/converter/include/config.hpp @@ -69,6 +69,11 @@ class MNN_PUBLIC modelConfig { bool mnn2json = false; bool dumpInfo = false; bool saveExternalData = false; + bool rknnSidecar = false; + std::string rknnTarget = ""; + std::string rknnPython = ""; + std::string rknnScript = ""; + std::string rknnOutputDir = ""; bool inSubGraph = false; // using external data when convert int64_t externalTreshold = 1024 * 64; @@ -79,6 +84,7 @@ class MNN_PUBLIC modelConfig { bool splitQuantBlock = false; // Enable verbose output for each optimization pass (like LLVM's -debug-pass) bool dumpPass = false; + int cliExitCode = 1; }; #endif // CONFIG_HPP diff --git a/tools/converter/source/MNNConverter.cpp b/tools/converter/source/MNNConverter.cpp index c81c9f2f78..5c2b4ed7ae 100644 --- a/tools/converter/source/MNNConverter.cpp +++ b/tools/converter/source/MNNConverter.cpp @@ -14,9 +14,8 @@ int main(int argc, char *argv[]) { // parser command line arg auto res = MNN::Cli::initializeMNNConvertArgs(modelPath, argc, argv); if (!res) { - return 0; + return modelPath.cliExitCode; } // Convert - MNN::Cli::convertModel(modelPath); - return 0; + return MNN::Cli::convertModel(modelPath) ? 0 : 1; } diff --git a/tools/converter/source/common/RKNNBundle.cpp b/tools/converter/source/common/RKNNBundle.cpp new file mode 100644 index 0000000000..4ff59cb4d8 --- /dev/null +++ b/tools/converter/source/common/RKNNBundle.cpp @@ -0,0 +1,300 @@ +#include "RKNNBundle.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "CaffeOp_generated.h" +#include "CommonUtils.hpp" +#include "MNN/ErrorCode.hpp" +#include "MNN_generated.h" +#include "core/MNNFileUtils.h" +#include "logkit.h" + +namespace { +static const char* MNN_RKNN_TARGET_ENV = "MNN_RKNN_TARGET"; +static const char* MNN_RKNN_PYTHON_ENV = "MNN_RKNN_PYTHON"; +static const char* MNN_RKNN_SCRIPT_ENV = "MNN_RKNN_SCRIPT"; +static const char* MNN_RKNN_OUTPUT_DIR_ENV = "MNN_RKNN_OUTPUT_DIR"; + +static std::string getEnvValue(const char* name) { + auto value = std::getenv(name); + if (nullptr == value) { + return ""; + } + return value; +} + +static bool loadRequiredEnv(std::string& dst, const char* name) { + dst = getEnvValue(name); + if (dst.empty()) { + MNN_ERROR("RKNN sidecar requires environment variable %s\n", name); + return false; + } + return true; +} + +static std::string shellEscape(const std::string& input) { + std::string escaped = "'"; + for (char c : input) { + if ('\'' == c) { + escaped += "'\\''"; + } else { + escaped.push_back(c); + } + } + escaped.push_back('\''); + return escaped; +} + +static std::string basenameWithoutExtension(const std::string& path) { + auto slash = path.find_last_of("/\\"); + std::string name = (slash == std::string::npos) ? path : path.substr(slash + 1); + auto dot = name.find_last_of('.'); + if (dot == std::string::npos) { + return name; + } + return name.substr(0, dot); +} + +struct InputInfo { + std::string name; + std::vector dims; + MNN::DataType dtype = MNN::DataType_DT_FLOAT; + MNN::MNN_DATA_FORMAT dformat = MNN::MNN_DATA_FORMAT_NC4HW4; +}; + +static std::vector collectInputInfos(const MNN::NetT& net) { + std::vector inputs; + for (const auto& op : net.oplists) { + if (nullptr == op || op->type != MNN::OpType_Input || op->outputIndexes.empty()) { + continue; + } + auto input = op->main.AsInput(); + if (nullptr == input) { + continue; + } + const auto outputIndex = op->outputIndexes[0]; + if (outputIndex < 0 || outputIndex >= net.tensorName.size()) { + MNN_ERROR("RKNN wrapper: invalid input tensor index %d\n", outputIndex); + return {}; + } + InputInfo info; + info.name = net.tensorName[outputIndex]; + info.dims.assign(input->dims.begin(), input->dims.end()); + info.dtype = input->dtype; + info.dformat = input->dformat; + inputs.emplace_back(std::move(info)); + } + return inputs; +} + +static std::vector collectOutputNames(const MNN::NetT& net) { + if (!net.outputName.empty()) { + return net.outputName; + } + std::set inputIndexes; + std::set outputIndexes; + std::vector outputNames; + for (const auto& op : net.oplists) { + if (nullptr == op) { + continue; + } + for (auto inputIndex : op->inputIndexes) { + inputIndexes.insert(inputIndex); + } + for (auto outputIndex : op->outputIndexes) { + outputIndexes.insert(outputIndex); + } + } + for (auto outputIndex : outputIndexes) { + if (inputIndexes.find(outputIndex) != inputIndexes.end()) { + continue; + } + if (outputIndex < 0 || outputIndex >= net.tensorName.size()) { + continue; + } + outputNames.emplace_back(net.tensorName[outputIndex]); + } + return outputNames; +} + +static std::unique_ptr makeStringAttr(const std::string& key, const std::string& value) { + std::unique_ptr attr(new MNN::AttributeT); + attr->key = key; + attr->s = value; + attr->type = MNN::DataType_DT_STRING; + return attr; +} + +static int ensureTensorIndex(const std::string& name, std::map* tensorMap, + std::vector* tensorNames) { + auto iter = tensorMap->find(name); + if (iter != tensorMap->end()) { + return iter->second; + } + const int index = static_cast(tensorNames->size()); + tensorNames->emplace_back(name); + tensorMap->insert(std::make_pair(name, index)); + return index; +} +} + +namespace MNN { + +bool PopulateRKNNConfigFromEnv(modelConfig& modelPath) { + if (!loadRequiredEnv(modelPath.rknnTarget, MNN_RKNN_TARGET_ENV)) { + return false; + } + if (!loadRequiredEnv(modelPath.rknnPython, MNN_RKNN_PYTHON_ENV)) { + return false; + } + if (!loadRequiredEnv(modelPath.rknnScript, MNN_RKNN_SCRIPT_ENV)) { + return false; + } + if (!loadRequiredEnv(modelPath.rknnOutputDir, MNN_RKNN_OUTPUT_DIR_ENV)) { + return false; + } + if (!CommonKit::FileIsExist(modelPath.rknnScript)) { + MNN_ERROR("RKNN script does not exist: %s\n", modelPath.rknnScript.c_str()); + return false; + } + return true; +} + +bool GenerateRKNNBundle(const modelConfig& modelPath, RKNNBundlePaths* bundlePaths) { + if (modelPath.model != modelConfig::ONNX) { + MNN_ERROR("RKNN sidecar only supports ONNX source models\n"); + return false; + } + if (modelPath.modelFile.empty() || modelPath.MNNModel.empty()) { + MNN_ERROR("RKNN sidecar requires both source ONNX path and output MNN path\n"); + return false; + } + if (!MNNDirExist(modelPath.rknnOutputDir.c_str()) && !MNNCreateDir(modelPath.rknnOutputDir.c_str())) { + MNN_ERROR("Create RKNN output dir failed: %s\n", modelPath.rknnOutputDir.c_str()); + return false; + } + + const auto baseName = basenameWithoutExtension(modelPath.MNNModel); + const auto rknnPath = MNNFilePathConcat(modelPath.rknnOutputDir, baseName + "_" + modelPath.rknnTarget + ".rknn"); + const auto manifestPath = MNNFilePathConcat(modelPath.rknnOutputDir, baseName + ".rknn.bundle.json"); + + std::ostringstream command; + command << shellEscape(modelPath.rknnPython) << " " + << shellEscape(modelPath.rknnScript) + << " --onnx " << shellEscape(modelPath.modelFile) + << " --output " << shellEscape(rknnPath) + << " --target " << shellEscape(modelPath.rknnTarget); + + MNN_PRINT("Generate RKNN sidecar with command: %s\n", command.str().c_str()); + auto ret = std::system(command.str().c_str()); + if (ret != 0) { + MNN_ERROR("RKNN sidecar generation failed, exit code: %d\n", ret); + return false; + } + if (!MNNFileExist(rknnPath.c_str())) { + MNN_ERROR("RKNN sidecar is not generated: %s\n", rknnPath.c_str()); + return false; + } + + std::ofstream manifest(manifestPath.c_str(), std::ios::out | std::ios::trunc); + if (!manifest.good()) { + MNN_ERROR("Open RKNN manifest failed: %s\n", manifestPath.c_str()); + return false; + } + manifest << "{\n"; + manifest << " \"onnx_model\": \"" << modelPath.modelFile << "\",\n"; + manifest << " \"mnn_model\": \"" << modelPath.MNNModel << "\",\n"; + manifest << " \"rknn_model\": \"" << rknnPath << "\",\n"; + manifest << " \"target\": \"" << modelPath.rknnTarget << "\""; + const auto weightPath = modelPath.MNNModel + ".weight"; + if (MNNFileExist(weightPath.c_str())) { + manifest << ",\n \"mnn_external_weight\": \"" << weightPath << "\"\n"; + } else { + manifest << "\n"; + } + manifest << "}\n"; + manifest.close(); + + if (!manifest.good()) { + MNN_ERROR("Write RKNN manifest failed: %s\n", manifestPath.c_str()); + return false; + } + + MNN_PRINT("RKNN sidecar generated: %s\n", rknnPath.c_str()); + MNN_PRINT("RKNN manifest generated: %s\n", manifestPath.c_str()); + if (nullptr != bundlePaths) { + bundlePaths->rknnPath = rknnPath; + bundlePaths->manifestPath = manifestPath; + } + return true; +} + +std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConfig& modelPath, + const RKNNBundlePaths& bundlePaths) { + auto inputs = collectInputInfos(sourceNet); + if (inputs.empty()) { + MNN_ERROR("RKNN wrapper: failed to collect input tensors from source net\n"); + return nullptr; + } + auto outputs = collectOutputNames(sourceNet); + if (outputs.empty()) { + MNN_ERROR("RKNN wrapper: failed to collect output tensors from source net\n"); + return nullptr; + } + + std::unique_ptr wrapper(new NetT); + wrapper->bizCode = modelPath.bizCode; + wrapper->sourceType = NetSource_ONNX; + wrapper->usage = Usage_INFERENCE; + wrapper->preferForwardType = ForwardType_CPU; + + std::map tensorMap; + std::vector inputIndexes; + std::vector outputIndexes; + + for (const auto& input : inputs) { + const int tensorIndex = ensureTensorIndex(input.name, &tensorMap, &wrapper->tensorName); + inputIndexes.emplace_back(tensorIndex); + + std::unique_ptr inputOp(new OpT); + inputOp->name = input.name; + inputOp->type = OpType_Input; + inputOp->main.type = OpParameter_Input; + inputOp->main.value = new InputT; + inputOp->main.AsInput()->dims.assign(input.dims.begin(), input.dims.end()); + inputOp->main.AsInput()->dtype = input.dtype; + inputOp->main.AsInput()->dformat = input.dformat; + inputOp->outputIndexes = {tensorIndex}; + inputOp->defaultDimentionFormat = input.dformat; + wrapper->oplists.emplace_back(std::move(inputOp)); + } + + for (const auto& output : outputs) { + outputIndexes.emplace_back(ensureTensorIndex(output, &tensorMap, &wrapper->tensorName)); + } + + std::unique_ptr rknnOp(new OpT); + rknnOp->name = "RKNNSubgraph"; + rknnOp->type = OpType_Extra; + rknnOp->main.type = OpParameter_Extra; + rknnOp->main.value = new ExtraT; + rknnOp->main.AsExtra()->type = "RKNN"; + rknnOp->main.AsExtra()->engine = "MNN"; + rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("model_path", bundlePaths.rknnPath)); + rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("bundle_manifest", bundlePaths.manifestPath)); + rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("target", modelPath.rknnTarget)); + rknnOp->inputIndexes = inputIndexes; + rknnOp->outputIndexes = outputIndexes; + wrapper->oplists.emplace_back(std::move(rknnOp)); + + wrapper->outputName = outputs; + wrapper->tensorNumber = static_cast(wrapper->tensorName.size()); + return wrapper; +} +} diff --git a/tools/converter/source/common/RKNNBundle.hpp b/tools/converter/source/common/RKNNBundle.hpp new file mode 100644 index 0000000000..faf65ff735 --- /dev/null +++ b/tools/converter/source/common/RKNNBundle.hpp @@ -0,0 +1,23 @@ +#ifndef RKNN_BUNDLE_HPP +#define RKNN_BUNDLE_HPP + +#include +#include + +#include "config.hpp" + +namespace MNN { +struct NetT; + +struct RKNNBundlePaths { + std::string rknnPath; + std::string manifestPath; +}; + +bool PopulateRKNNConfigFromEnv(modelConfig& modelPath); +bool GenerateRKNNBundle(const modelConfig& modelPath, RKNNBundlePaths* bundlePaths); +std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConfig& modelPath, + const RKNNBundlePaths& bundlePaths); +} + +#endif diff --git a/tools/converter/source/common/cli.cpp b/tools/converter/source/common/cli.cpp index d2dd9ab4d4..802b6e8c6c 100644 --- a/tools/converter/source/common/cli.cpp +++ b/tools/converter/source/common/cli.cpp @@ -35,6 +35,9 @@ #include "CommonUtils.hpp" #include "PostConverter.hpp" #include "Json2Flatbuffer.hpp" +#ifdef ENABLE_RKNN_CONVERT_MODE +#include "RKNNBundle.hpp" +#endif #include #include #include @@ -225,15 +228,20 @@ bool Cli::initializeMNNConvertArgs(modelConfig &modelPath, int argc, char **argv "dumpPass", "Enable verbose output for each optimization pass, showing what changes each pass made (like LLVM's " "-debug-pass)"); +#ifdef ENABLE_RKNN_CONVERT_MODE + options.add_options()("rknn", "generate RKNN sidecar from source ONNX and environment variables"); +#endif auto result = options.parse(argc, argv); if (result.count("help")) { + modelPath.cliExitCode = 0; std::cout << options.help({""}) << std::endl; return false; } if (result.count("version")) { + modelPath.cliExitCode = 0; std::cout << MNN_VERSION << std::endl; return false; } @@ -269,6 +277,7 @@ bool Cli::initializeMNNConvertArgs(modelConfig &modelPath, int argc, char **argv return false; } if (result.count("OP")) { + modelPath.cliExitCode = 0; MNN_PRINT("Dump %s support Ops\n", frameWork.c_str()); const auto& res = OpCount::get()->getMap().find(frameWork); if (res == OpCount::get()->getMap().end()) { @@ -448,6 +457,14 @@ bool Cli::initializeMNNConvertArgs(modelConfig &modelPath, int argc, char **argv if (result.count("dumpPass")) { modelPath.dumpPass = true; } +#ifdef ENABLE_RKNN_CONVERT_MODE + if (result.count("rknn")) { + modelPath.rknnSidecar = true; + if (!PopulateRKNNConfigFromEnv(modelPath)) { + return false; + } + } +#endif return true; } @@ -651,19 +668,37 @@ bool Cli::convertModel(modelConfig& modelPath) { expectedPass.emplace_back("SplitBlockQuantConvolution"); } CommonKit::loadCompress(modelPath); + std::unique_ptr finalNet; if (needOptimize) { std::cout << "Start to Optimize the MNN Net..." << std::endl; - std::unique_ptr newNet = optimizeNet(netT, modelPath.forTraining, modelPath, expectedPass); - if (newNet->extraTensorDescribe.size()>0 && expectedPass.empty()) { + finalNet = optimizeNet(netT, modelPath.forTraining, modelPath, expectedPass); + if (finalNet->extraTensorDescribe.size()>0 && expectedPass.empty()) { MNN_PRINT("MNN net has tensor quant info\n"); - computeUnaryBuffer(newNet.get()); + computeUnaryBuffer(finalNet.get()); } - _reorderInputs(inputNames, newNet.get()); - error = writeFb(newNet, modelPath, std::move(metaOp)); + _reorderInputs(inputNames, finalNet.get()); } else { _reorderInputs(inputNames, netT.get()); - error = writeFb(netT, modelPath, std::move(metaOp)); + finalNet = std::move(netT); } + +#ifdef ENABLE_RKNN_CONVERT_MODE + if (modelPath.rknnSidecar) { + RKNNBundlePaths bundlePaths; + if (!GenerateRKNNBundle(modelPath, &bundlePaths)) { + return false; + } + auto wrapperNet = BuildRKNNWrapperNet(*finalNet, modelPath, bundlePaths); + if (nullptr == wrapperNet) { + return false; + } + error = writeFb(wrapperNet, modelPath, std::move(metaOp)); + } else { + error = writeFb(finalNet, modelPath, std::move(metaOp)); + } +#else + error = writeFb(finalNet, modelPath, std::move(metaOp)); +#endif if (0 == error) { std::cout << "Converted Success!" << std::endl; } else { From 5704365d4acc4e6f4a4c656e0c38b99a6a76c39d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 9 Jun 2026 11:19:10 +0800 Subject: [PATCH 2/3] [NPU:rknn] migrate rknn pipeline to CPU Plugin --- docs/inference/npu.md | 8 +- source/backend/rknn/CMakeLists.txt | 4 + source/backend/rknn/backend/RKNNBackend.cpp | 276 +++++++------------ source/backend/rknn/backend/RKNNBackend.hpp | 53 +--- source/core/Backend.cpp | 6 - tools/converter/source/common/RKNNBundle.cpp | 154 ++++++++++- 6 files changed, 244 insertions(+), 257 deletions(-) diff --git a/docs/inference/npu.md b/docs/inference/npu.md index a239d58ad8..dd5702cfdc 100644 --- a/docs/inference/npu.md +++ b/docs/inference/npu.md @@ -185,12 +185,12 @@ cp -r ${DDK}/include ${MNN}/source/backend/hiai/3rdParty/include - 包装后的 `.mnn` - sidecar `.rknn` -其中 `.mnn` 内部只保留 `Input + Extra(type="RKNN")` 包装图,运行时由 MNN 的 RKNN backend 调用 RKNN C API 执行 `.rknn`。 +其中 `.mnn` 内部保留 `Input + Plugin(type="RKNN")` 包装图,运行时由 MNN 的 CPU Plugin 框架调用 RKNN C API 执行 `.rknn`。 ### RKNN 后端整体介绍 - Host 侧通过 `MNNConvert --rknn` 完成双产物生成,不走 `compilefornpu` 的 `MNN -> NPU` 逐算子编译链路。 -- Device 侧通过 RKNN C API 加载 `.rknn` 并执行,当前 backend 注册为 `MNN_FORWARD_USER_2`。 +- Device 侧通过 MNN 的 CPU Plugin 框架调用 RKNN C API 加载 `.rknn` 并执行;应用侧 Session backend 仍使用 `MNN_FORWARD_CPU`。 - RKNN backend 读取 runtime 库路径、转换脚本路径、目标平台等信息时,不做硬编码,全部从环境变量读取;缺失时直接报 `MNN_ERROR`。 ### 编译 @@ -268,13 +268,13 @@ ${BUILD_DIR}/MNNConvert \ - 指向目标板上的 `librknnrt.so` 并在创建 Session 时选择: -- backend type = `MNN_FORWARD_USER_2` +- backend type = `MNN_FORWARD_CPU` 如果 `.rknn` 路径在 wrapper `.mnn` 中是相对路径,则需要确保模型外部路径设置正确,使 MNN 能解析 sidecar 所在目录。 ### 当前限制 -- 当前 RKNN backend 只执行 `Extra(type="RKNN")` 节点,不支持逐算子 RKNN backend。 +- 当前 RKNN 路径执行 `Plugin(type="RKNN")` 节点,不支持逐算子 RKNN backend。 - 当前实现走 host buffer copy 路径,尚未做 zero-copy。 - 当前输出路径按 `float32` 处理。 - 当前主目标是板端运行;PC 侧如果没有可用的 x86 `librknnrt.so`,则不能直接用 MNN runtime 在 Host 上模拟执行 RKNN backend。 diff --git a/source/backend/rknn/CMakeLists.txt b/source/backend/rknn/CMakeLists.txt index bb95975eb1..c5c528ead5 100644 --- a/source/backend/rknn/CMakeLists.txt +++ b/source/backend/rknn/CMakeLists.txt @@ -10,6 +10,10 @@ if ("${_RKNN_API_INCLUDE}" STREQUAL "") message(FATAL_ERROR "MNN_RKNN=ON requires RKNN_API_INCLUDE_DIR (or env RKNN_API_INCLUDE_DIR) to point to the directory containing rknn_api.h") endif() +if (NOT MNN_WITH_PLUGIN) + message(FATAL_ERROR "MNN_RKNN=ON requires MNN_WITH_PLUGIN=ON because RKNN is implemented as Plugin(RKNN) + CPU Plugin kernels") +endif() + add_library(MNN_RKNN OBJECT ${MNN_RKNN_SRCS}) target_include_directories(MNN_RKNN PRIVATE ${CMAKE_CURRENT_LIST_DIR}/backend/) target_include_directories(MNN_RKNN PRIVATE ${_RKNN_API_INCLUDE}) diff --git a/source/backend/rknn/backend/RKNNBackend.cpp b/source/backend/rknn/backend/RKNNBackend.cpp index a9999ff726..cbe5e37dc4 100644 --- a/source/backend/rknn/backend/RKNNBackend.cpp +++ b/source/backend/rknn/backend/RKNNBackend.cpp @@ -1,5 +1,3 @@ -#include "RKNNBackend.hpp" - #include #include #include @@ -8,37 +6,25 @@ #include #include -#include "MNN_generated.h" +#include "MNN/plugin/PluginContext.hpp" +#include "MNN/plugin/PluginKernel.hpp" +#include "MNN/plugin/PluginShapeInference.hpp" +#include "core/Backend.hpp" #include "core/MNNFileUtils.h" #include "core/Macro.h" #include "core/TensorUtils.hpp" #include "rknn_api.h" +#include "shape/SizeComputer.hpp" +#ifdef MNN_WITH_PLUGIN namespace MNN { namespace RKNN { namespace { static const char* kRuntimeLibEnv = "MNN_RKNN_RUNTIME_LIB"; -static const char* kExtraTypeName = "RKNN"; +static const char* kPluginTypeName = "RKNN"; static const char* kModelPathAttr = "model_path"; -class HostMemObj : public Backend::MemObj { -public: - explicit HostMemObj(size_t size) : mPtr(std::malloc(size)) { - } - ~HostMemObj() override { - std::free(mPtr); - } - MemChunk chunk() override { - return MemChunk(mPtr, 0); - } - bool valid() const { - return nullptr != mPtr; - } -private: - void* mPtr = nullptr; -}; - struct RKNNApi { using Init = int (*)(rknn_context*, void*, uint32_t, uint32_t, rknn_init_extend*); using Destroy = int (*)(rknn_context); @@ -64,7 +50,7 @@ static const RKNNApi* loadApi() { static RKNNApi api; std::call_once(once, []() { auto libPath = std::getenv(kRuntimeLibEnv); - if (nullptr == libPath || libPath[0] == '\0') { + if (nullptr == libPath || libPath[0] == 0) { MNN_ERROR("MNN_RKNN: missing environment variable %s\n", kRuntimeLibEnv); return; } @@ -76,7 +62,7 @@ static const RKNNApi* loadApi() { #define MNN_RKNN_LOAD_SYMBOL(typeName, field, symbol) \ api.field = reinterpret_cast(dlsym(api.handle, symbol)); \ if (nullptr == api.field) { \ - MNN_ERROR("MNN_RKNN: dlsym failed for %s\n", symbol); \ + MNN_ERROR("MNN_RKNN: dlsym failed for %s\n", symbol); \ return; \ } MNN_RKNN_LOAD_SYMBOL(Init, init, "rknn_init"); @@ -92,30 +78,22 @@ static const RKNNApi* loadApi() { return api.loaded ? &api : nullptr; } -static std::string getStringAttr(const Extra* extra, const char* key) { - if (nullptr == extra || nullptr == extra->attr()) { +static std::string getStringAttr(const plugin::PluginContext* ctx, const char* key) { + auto attr = ctx->getAttr(key); + if (nullptr == attr || nullptr == attr->s()) { return ""; } - for (int i = 0; i < extra->attr()->size(); ++i) { - auto attr = extra->attr()->GetAs(i); - if (nullptr == attr || nullptr == attr->key()) { - continue; - } - if (attr->key()->str() == key && nullptr != attr->s()) { - return attr->s()->str(); - } - } - return ""; + return attr->s()->str(); } -static std::string resolveModelPath(const Backend* backend, const std::string& path) { +static std::string resolveModelPath(const std::string& dirPath, const std::string& path) { if (path.empty()) { return ""; } - if (!path.empty() && path[0] == '/') { + if (path[0] == '/') { return path; } - return MNNFilePathConcat(backend->pNPUModelDirPath, path); + return MNNFilePathConcat(dirPath, path); } static rknn_tensor_type mapTensorType(const Tensor* tensor) { @@ -147,40 +125,62 @@ static Tensor::DimensionType getHostTensorDimType(const Tensor* tensor) { return tensor->getDimensionType(); } -class RKNNExecution : public Execution { +class RKNNPluginShape : public plugin::InferShapeKernel { public: - RKNNExecution(Backend* backend, const Op* op, const RKNNApi* api) : Execution(backend), mApi(api) { - if (nullptr == op || op->type() != OpType_Extra || nullptr == op->main_as_Extra()) { - MNN_ERROR("MNN_RKNN: invalid op for RKNN execution\n"); - mValid = false; - return; + bool compute(plugin::InferShapeContext* ctx) override { + for (int i = 0; i < ctx->outputs().size(); ++i) { + auto key = std::string("o_") + std::to_string(i); + auto attr = ctx->getAttr(key); + if (nullptr == attr || nullptr == attr->tensor()) { + MNN_ERROR("MNN_RKNN: missing output shape attr %s\n", key.c_str()); + return false; + } + auto blob = attr->tensor(); + auto dst = ctx->output(i); + dst->setType(blob->dataType()); + if (nullptr != blob->dims()) { + dst->buffer().dimensions = blob->dims()->size(); + for (int j = 0; j < blob->dims()->size(); ++j) { + dst->setLength(j, blob->dims()->data()[j]); + } + } else { + dst->buffer().dimensions = 0; + } + TensorUtils::getDescribe(dst)->dimensionFormat = blob->dataFormat(); } - auto extra = op->main_as_Extra(); - if (extra->type()->str() != kExtraTypeName) { - MNN_ERROR("MNN_RKNN: unsupported Extra type\n"); - mValid = false; - return; + return true; + } +}; + +class RKNNPluginExecute : public plugin::CPUComputeKernel { +public: + ~RKNNPluginExecute() override { + if (mContext != 0 && nullptr != mApi) { + mApi->destroy(mContext); + } + } + + bool init(plugin::CPUKernelContext* ctx) override { + mApi = loadApi(); + if (nullptr == mApi) { + return false; } - mModelPath = resolveModelPath(backend, getStringAttr(extra, kModelPathAttr)); + mModelPath = resolveModelPath(ctx->dir_path(), getStringAttr(ctx, kModelPathAttr)); if (mModelPath.empty()) { - MNN_ERROR("MNN_RKNN: Extra(%s) requires attr '%s'\n", kExtraTypeName, kModelPathAttr); - mValid = false; - return; + MNN_ERROR("MNN_RKNN: Plugin(%s) requires attr %s\n", kPluginTypeName, kModelPathAttr); + return false; } if (!MNNFileExist(mModelPath.c_str())) { MNN_ERROR("MNN_RKNN: model file does not exist: %s\n", mModelPath.c_str()); - mValid = false; - return; + return false; } if (mApi->init(&mContext, (void*)mModelPath.c_str(), 0, 0, nullptr) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_init failed for %s\n", mModelPath.c_str()); - mValid = false; - return; + return false; } if (mApi->query(mContext, RKNN_QUERY_IN_OUT_NUM, &mIoNum, sizeof(mIoNum)) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: query in/out num failed\n"); - mValid = false; - return; + return false; } mInputAttrs.resize(mIoNum.n_input); mOutputAttrs.resize(mIoNum.n_output); @@ -189,8 +189,7 @@ class RKNNExecution : public Execution { mInputAttrs[i].index = i; if (mApi->query(mContext, RKNN_QUERY_INPUT_ATTR, &mInputAttrs[i], sizeof(rknn_tensor_attr)) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: query input attr failed: %u\n", i); - mValid = false; - return; + return false; } } for (uint32_t i = 0; i < mIoNum.n_output; ++i) { @@ -198,35 +197,30 @@ class RKNNExecution : public Execution { mOutputAttrs[i].index = i; if (mApi->query(mContext, RKNN_QUERY_OUTPUT_ATTR, &mOutputAttrs[i], sizeof(rknn_tensor_attr)) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: query output attr failed: %u\n", i); - mValid = false; - return; + return false; } } + return true; } - ~RKNNExecution() override { - if (mContext != 0 && nullptr != mApi) { - mApi->destroy(mContext); - } - } - - ErrorCode onResize(const std::vector& inputs, const std::vector& outputs) override { - if ((uint32_t)inputs.size() != mIoNum.n_input || (uint32_t)outputs.size() != mIoNum.n_output) { + bool resize(plugin::CPUKernelContext* ctx) override { + if ((uint32_t)ctx->inputs().size() != mIoNum.n_input || (uint32_t)ctx->outputs().size() != mIoNum.n_output) { MNN_ERROR("MNN_RKNN: input/output count mismatch, expect %u/%u, got %zu/%zu\n", - mIoNum.n_input, mIoNum.n_output, inputs.size(), outputs.size()); - return INVALID_VALUE; + mIoNum.n_input, mIoNum.n_output, ctx->inputs().size(), ctx->outputs().size()); + return false; } - return NO_ERROR; + return true; } - ErrorCode onExecute(const std::vector& inputs, const std::vector& outputs) override { + bool compute(plugin::CPUKernelContext* ctx) override { std::vector> hostInputs; - std::vector rknnInputs(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - hostInputs.emplace_back(new Tensor(inputs[i], getHostTensorDimType(inputs[i]))); - if (!MNNCPUCopyBuffer(inputs[i], hostInputs.back().get())) { + std::vector rknnInputs(ctx->inputs().size()); + for (size_t i = 0; i < ctx->inputs().size(); ++i) { + auto src = ctx->input((int)i); + hostInputs.emplace_back(new Tensor(src, getHostTensorDimType(src))); + if (!MNNCPUCopyBuffer(src, hostInputs.back().get())) { MNN_ERROR("MNN_RKNN: failed to copy input tensor %zu to host\n", i); - return INVALID_VALUE; + return false; } std::memset(&rknnInputs[i], 0, sizeof(rknn_input)); rknnInputs[i].index = (uint32_t)i; @@ -238,15 +232,15 @@ class RKNNExecution : public Execution { } if (mApi->inputsSet(mContext, (uint32_t)rknnInputs.size(), rknnInputs.data()) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_inputs_set failed\n"); - return INVALID_VALUE; + return false; } if (mApi->run(mContext, nullptr) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_run failed\n"); - return INVALID_VALUE; + return false; } - std::vector rknnOutputs(outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) { + std::vector rknnOutputs(ctx->outputs().size()); + for (size_t i = 0; i < ctx->outputs().size(); ++i) { std::memset(&rknnOutputs[i], 0, sizeof(rknn_output)); rknnOutputs[i].index = (uint32_t)i; rknnOutputs[i].want_float = 1; @@ -254,26 +248,27 @@ class RKNNExecution : public Execution { } if (mApi->outputsGet(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data(), nullptr) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_outputs_get failed\n"); - return INVALID_VALUE; + return false; } - for (size_t i = 0; i < outputs.size(); ++i) { - if (outputs[i]->getType().code != halide_type_float || outputs[i]->getType().bits != 32) { - MNN_ERROR("MNN_RKNN: only float32 outputs are supported in the first runtime version\n"); + for (size_t i = 0; i < ctx->outputs().size(); ++i) { + auto dst = ctx->output((int)i); + if (dst->getType().code != halide_type_float || dst->getType().bits != 32) { + MNN_ERROR("MNN_RKNN: only float32 outputs are supported in the first plugin version\n"); mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); - return NOT_SUPPORT; + return false; } - Tensor hostOutput(outputs[i], getHostTensorDimType(outputs[i])); + Tensor hostOutput(dst, getHostTensorDimType(dst)); auto copySize = ALIMIN((int)hostOutput.size(), (int)rknnOutputs[i].size); std::memcpy(hostOutput.buffer().host, rknnOutputs[i].buf, copySize); - if (!MNNCPUCopyBuffer(&hostOutput, outputs[i])) { + if (!MNNCPUCopyBuffer(&hostOutput, dst)) { MNN_ERROR("MNN_RKNN: failed to copy output tensor %zu from host\n", i); mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); - return INVALID_VALUE; + return false; } } mApi->outputsRelease(mContext, (uint32_t)rknnOutputs.size(), rknnOutputs.data()); - return NO_ERROR; + return true; } private: @@ -285,95 +280,12 @@ class RKNNExecution : public Execution { std::vector mOutputAttrs; }; -} // namespace - -RKNNBackend::RKNNBackend(const RKNNRuntime* runtime) : Backend(MNN_FORWARD_USER_2), mRuntime(runtime) { -} - -Execution* RKNNBackend::onCreate(const std::vector& inputs, const std::vector& outputs, const MNN::Op* op) { - auto api = loadApi(); - if (nullptr == api) { - return nullptr; - } - if (nullptr == op || op->type() != OpType_Extra || nullptr == op->main_as_Extra()) { - return nullptr; - } - auto extra = op->main_as_Extra(); - if (extra->type()->str() != kExtraTypeName) { - return nullptr; - } - auto exe = new RKNNExecution(this, op, api); - if (!exe->valid()) { - delete exe; - return nullptr; - } - return exe; -} - -void RKNNBackend::onResizeBegin() { -} - -ErrorCode RKNNBackend::onResizeEnd() { - return NO_ERROR; -} - -void RKNNBackend::onExecuteBegin() const { -} - -void RKNNBackend::onExecuteEnd() const { -} - -Backend::MemObj* RKNNBackend::onAcquire(const Tensor* tensor, StorageType storageType) { - auto mem = new HostMemObj(tensor->size()); - if (!mem->valid()) { - delete mem; - return nullptr; - } - return mem; -} - -bool RKNNBackend::onClearBuffer() { - return true; -} - -void RKNNBackend::onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const { - MNNCPUCopyBuffer(srcTensor, dstTensor); -} - -const Runtime* RKNNBackend::getRuntime() { - return mRuntime; -} - -RKNNRuntime::RKNNRuntime(const Backend::Info& info) : mInfo(info) { -} - -Backend* RKNNRuntime::onCreate(const BackendConfig* config, Backend* origin) const { - return new RKNNBackend(this); -} - -void RKNNRuntime::onGabageCollect(int level) { -} - -Runtime::CompilerType RKNNRuntime::onGetCompilerType() const { - return Runtime::Compiler_Origin; -} - -Runtime* RKNNRuntimeCreator::onCreate(const Backend::Info& info) const { - if (nullptr == loadApi()) { - return nullptr; - } - return new RKNNRuntime(info); -} - -bool RKNNRuntimeCreator::onValid(Backend::Info& info) const { - info.mode = Backend::Info::DIRECT; - return true; -} +static auto _rknn_plugin_shape_registrar __attribute__((unused)) = + MNN::plugin::InferShapeKernelRegistrar("RKNN"); +static auto _rknn_plugin_compute_registrar __attribute__((unused)) = + MNN::plugin::ComputeKernelRegistrar("RKNN"); +} // namespace } // namespace RKNN - -void registerRKNNRuntimeCreator() { - MNNInsertExtraRuntimeCreator(MNN_FORWARD_USER_2, new RKNN::RKNNRuntimeCreator, false); -} - } // namespace MNN +#endif diff --git a/source/backend/rknn/backend/RKNNBackend.hpp b/source/backend/rknn/backend/RKNNBackend.hpp index fcc5f02fa0..4cec68afb1 100644 --- a/source/backend/rknn/backend/RKNNBackend.hpp +++ b/source/backend/rknn/backend/RKNNBackend.hpp @@ -1,56 +1,7 @@ #ifndef MNN_RKNNBACKEND_HPP #define MNN_RKNNBACKEND_HPP -#include "core/Backend.hpp" -#include "core/Execution.hpp" - -namespace MNN { -namespace RKNN { - -class RKNNRuntime; - -class RKNNBackend : public Backend { -public: - explicit RKNNBackend(const RKNNRuntime* runtime); - ~RKNNBackend() override = default; - - Execution* onCreate(const std::vector& inputs, const std::vector& outputs, - const MNN::Op* op) override; - void onResizeBegin() override; - ErrorCode onResizeEnd() override; - void onExecuteBegin() const override; - void onExecuteEnd() const override; - MemObj* onAcquire(const Tensor* tensor, StorageType storageType) override; - bool onClearBuffer() override; - void onCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor) const override; - const Runtime* getRuntime() override; - -private: - const RKNNRuntime* mRuntime; -}; - -class RKNNRuntime : public Runtime { -public: - explicit RKNNRuntime(const Backend::Info& info); - ~RKNNRuntime() override = default; - - Backend* onCreate(const BackendConfig* config = nullptr, Backend* origin = nullptr) const override; - void onGabageCollect(int level) override; - CompilerType onGetCompilerType() const override; - -private: - Backend::Info mInfo; -}; - -class RKNNRuntimeCreator : public RuntimeCreator { -public: - Runtime* onCreate(const Backend::Info& info) const override; - bool onValid(Backend::Info& info) const override; -}; - -} // namespace RKNN - -void registerRKNNRuntimeCreator(); -} // namespace MNN +// RKNN is implemented as Plugin("RKNN") + CPU Plugin kernels. +// This header is kept only as a placeholder for the source/backend/rknn tree. #endif diff --git a/source/core/Backend.cpp b/source/core/Backend.cpp index 4b89e321f9..f5140b35ab 100644 --- a/source/core/Backend.cpp +++ b/source/core/Backend.cpp @@ -48,9 +48,6 @@ extern void registerNNAPIRuntimeCreator(); #if MNN_QNN_ENABLED extern void registerQNNRuntimeCreator(); #endif -#if MNN_RKNN_ENABLED -extern void registerRKNNRuntimeCreator(); -#endif #ifdef MNN_NEUROPILOT extern void registerNeuroPilot(); #endif @@ -74,9 +71,6 @@ void registerBackend() { #if MNN_QNN_ENABLED registerQNNRuntimeCreator(); #endif -#if MNN_RKNN_ENABLED - registerRKNNRuntimeCreator(); -#endif #if MNN_OPENCL_ENABLED OpenCL::registerOpenCLRuntimeCreator(); #endif diff --git a/tools/converter/source/common/RKNNBundle.cpp b/tools/converter/source/common/RKNNBundle.cpp index 4ff59cb4d8..ca7b5192aa 100644 --- a/tools/converter/source/common/RKNNBundle.cpp +++ b/tools/converter/source/common/RKNNBundle.cpp @@ -12,6 +12,7 @@ #include "CommonUtils.hpp" #include "MNN/ErrorCode.hpp" #include "MNN_generated.h" +#include "../optimizer/Program.hpp" #include "core/MNNFileUtils.h" #include "logkit.h" @@ -41,7 +42,7 @@ static bool loadRequiredEnv(std::string& dst, const char* name) { static std::string shellEscape(const std::string& input) { std::string escaped = "'"; for (char c : input) { - if ('\'' == c) { + if (c == '\'') { escaped += "'\\''"; } else { escaped.push_back(c); @@ -60,7 +61,6 @@ static std::string basenameWithoutExtension(const std::string& path) { } return name.substr(0, dot); } - struct InputInfo { std::string name; std::vector dims; @@ -68,6 +68,13 @@ struct InputInfo { MNN::MNN_DATA_FORMAT dformat = MNN::MNN_DATA_FORMAT_NC4HW4; }; +struct OutputInfo { + std::string name; + std::vector dims; + MNN::DataType dtype = MNN::DataType_DT_FLOAT; + MNN::MNN_DATA_FORMAT dformat = MNN::MNN_DATA_FORMAT_NC4HW4; +}; + static std::vector collectInputInfos(const MNN::NetT& net) { std::vector inputs; for (const auto& op : net.oplists) { @@ -123,6 +130,99 @@ static std::vector collectOutputNames(const MNN::NetT& net) { return outputNames; } +static MNN::DataType mapExprDataType(const halide_type_t& type) { + if (type.code == halide_type_float) { + if (type.bits == 16) { + return MNN::DataType_DT_HALF; + } + if (type.bits == 64) { + return MNN::DataType_DT_DOUBLE; + } + return MNN::DataType_DT_FLOAT; + } + if (type.code == halide_type_uint) { + if (type.bits == 8) { + return MNN::DataType_DT_UINT8; + } + if (type.bits == 16) { + return MNN::DataType_DT_UINT16; + } + if (type.bits == 32) { + return MNN::DataType_DT_INT32; + } + return MNN::DataType_DT_INT32; + } + if (type.code == halide_type_int) { + if (type.bits == 8) { + return MNN::DataType_DT_INT8; + } + if (type.bits == 16) { + return MNN::DataType_DT_INT16; + } + if (type.bits == 64) { + return MNN::DataType_DT_INT64; + } + return MNN::DataType_DT_INT32; + } + if (type.code == halide_type_handle) { + return MNN::DataType_DT_STRING; + } + return MNN::DataType_DT_FLOAT; +} + +static MNN::MNN_DATA_FORMAT mapExprFormat(MNN::Express::Dimensionformat format) { + switch (format) { + case MNN::Express::NHWC: + return MNN::MNN_DATA_FORMAT_NHWC; + case MNN::Express::NC4HW4: + return MNN::MNN_DATA_FORMAT_NC4HW4; + case MNN::Express::NCHW: + default: + return MNN::MNN_DATA_FORMAT_NCHW; + } +} + +static std::vector collectOutputInfos(const MNN::NetT& net) { + auto outputNames = collectOutputNames(net); + if (outputNames.empty()) { + return {}; + } + auto program = MNN::Express::Program::create(&net, true, true); + if (nullptr == program) { + MNN_ERROR("RKNN wrapper: failed to build Program for output shape inference\n"); + return {}; + } + + std::map infoMap; + for (const auto& output : program->outputs()) { + if (output == nullptr) { + continue; + } + auto info = output->getInfo(); + if (nullptr == info) { + continue; + } + infoMap.insert(std::make_pair(output->name(), info)); + } + + std::vector outputs; + outputs.reserve(outputNames.size()); + for (const auto& name : outputNames) { + auto infoIter = infoMap.find(name); + if (infoIter == infoMap.end() || nullptr == infoIter->second) { + MNN_ERROR("RKNN wrapper: failed to infer output info for tensor %s\n", name.c_str()); + return {}; + } + OutputInfo info; + info.name = name; + info.dims.assign(infoIter->second->dim.begin(), infoIter->second->dim.end()); + info.dtype = mapExprDataType(infoIter->second->type); + info.dformat = mapExprFormat(infoIter->second->order); + outputs.emplace_back(std::move(info)); + } + return outputs; +} + static std::unique_ptr makeStringAttr(const std::string& key, const std::string& value) { std::unique_ptr attr(new MNN::AttributeT); attr->key = key; @@ -131,6 +231,24 @@ static std::unique_ptr makeStringAttr(const std::string& key, c return attr; } +static std::unique_ptr makeStringListAttr(const std::string& key, const std::vector& values) { + std::unique_ptr attr(new MNN::AttributeT); + attr->key = key; + attr->list.reset(new MNN::ListValueT); + attr->list->s = values; + return attr; +} + +static std::unique_ptr makeBlobAttr(const std::string& key, const OutputInfo& info) { + std::unique_ptr attr(new MNN::AttributeT); + attr->key = key; + attr->tensor.reset(new MNN::BlobT); + attr->tensor->dataType = info.dtype; + attr->tensor->dims = info.dims; + attr->tensor->dataFormat = info.dformat; + return attr; +} + static int ensureTensorIndex(const std::string& name, std::map* tensorMap, std::vector* tensorNames) { auto iter = tensorMap->find(name); @@ -242,7 +360,7 @@ std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConf MNN_ERROR("RKNN wrapper: failed to collect input tensors from source net\n"); return nullptr; } - auto outputs = collectOutputNames(sourceNet); + auto outputs = collectOutputInfos(sourceNet); if (outputs.empty()) { MNN_ERROR("RKNN wrapper: failed to collect output tensors from source net\n"); return nullptr; @@ -257,10 +375,13 @@ std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConf std::map tensorMap; std::vector inputIndexes; std::vector outputIndexes; + std::vector inputNames; + std::vector outputNames; for (const auto& input : inputs) { const int tensorIndex = ensureTensorIndex(input.name, &tensorMap, &wrapper->tensorName); inputIndexes.emplace_back(tensorIndex); + inputNames.emplace_back(input.name); std::unique_ptr inputOp(new OpT); inputOp->name = input.name; @@ -276,25 +397,30 @@ std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConf } for (const auto& output : outputs) { - outputIndexes.emplace_back(ensureTensorIndex(output, &tensorMap, &wrapper->tensorName)); + outputIndexes.emplace_back(ensureTensorIndex(output.name, &tensorMap, &wrapper->tensorName)); + outputNames.emplace_back(output.name); } std::unique_ptr rknnOp(new OpT); rknnOp->name = "RKNNSubgraph"; - rknnOp->type = OpType_Extra; - rknnOp->main.type = OpParameter_Extra; - rknnOp->main.value = new ExtraT; - rknnOp->main.AsExtra()->type = "RKNN"; - rknnOp->main.AsExtra()->engine = "MNN"; - rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("model_path", bundlePaths.rknnPath)); - rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("bundle_manifest", bundlePaths.manifestPath)); - rknnOp->main.AsExtra()->attr.emplace_back(makeStringAttr("target", modelPath.rknnTarget)); + rknnOp->type = OpType_Plugin; + rknnOp->main.type = OpParameter_Plugin; + rknnOp->main.value = new PluginT; + rknnOp->main.AsPlugin()->type = "RKNN"; + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("model_path", bundlePaths.rknnPath)); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("bundle_manifest", bundlePaths.manifestPath)); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("target", modelPath.rknnTarget)); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringListAttr("inputs", inputNames)); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringListAttr("outputs", outputNames)); + for (int i = 0; i < outputs.size(); ++i) { + rknnOp->main.AsPlugin()->attr.emplace_back(makeBlobAttr("o_" + std::to_string(i), outputs[i])); + } rknnOp->inputIndexes = inputIndexes; rknnOp->outputIndexes = outputIndexes; wrapper->oplists.emplace_back(std::move(rknnOp)); - wrapper->outputName = outputs; + wrapper->outputName = outputNames; wrapper->tensorNumber = static_cast(wrapper->tensorName.size()); return wrapper; } -} +} // namespace MNN From a4132d7161b6fb8341dafa58b82cb994e02f4c89 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 10 Jun 2026 12:27:38 +0800 Subject: [PATCH 3/3] [NPU:rknn] fix a layout bug and update profiling interface. --- docs/inference/npu.md | 23 ++ express/Executor.cpp | 11 +- include/MNN/Interpreter.hpp | 8 +- source/backend/rknn/README.md | 298 +++++++++++++++++++ source/backend/rknn/backend/RKNNBackend.cpp | 109 ++++++- source/core/Backend.hpp | 30 +- source/core/Pipeline.hpp | 3 + source/core/Session.cpp | 17 ++ tools/converter/source/common/RKNNBundle.cpp | 20 +- 9 files changed, 506 insertions(+), 13 deletions(-) create mode 100644 source/backend/rknn/README.md diff --git a/docs/inference/npu.md b/docs/inference/npu.md index dd5702cfdc..1c8f6ef366 100644 --- a/docs/inference/npu.md +++ b/docs/inference/npu.md @@ -193,6 +193,10 @@ cp -r ${DDK}/include ${MNN}/source/backend/hiai/3rdParty/include - Device 侧通过 MNN 的 CPU Plugin 框架调用 RKNN C API 加载 `.rknn` 并执行;应用侧 Session backend 仍使用 `MNN_FORWARD_CPU`。 - RKNN backend 读取 runtime 库路径、转换脚本路径、目标平台等信息时,不做硬编码,全部从环境变量读取;缺失时直接报 `MNN_ERROR`。 +更完整的 RKNN 说明、包内容、示例代码与板端运行方式,请参考: +- `source/backend/rknn/README.md` + - 包含 Host 转换、aarch64 交叉编译、Plugin 运行机制、独立示例代码、板端包内容与运行方式。 + ### 编译 #### Host,编译带 RKNN 转换能力的 MNNConvert @@ -270,8 +274,27 @@ ${BUILD_DIR}/MNNConvert \ 并在创建 Session 时选择: - backend type = `MNN_FORWARD_CPU` +**注意:在 RK 板上执行任何真正调用 NPU 的命令时,必须使用 `sudo`。** + 如果 `.rknn` 路径在 wrapper `.mnn` 中是相对路径,则需要确保模型外部路径设置正确,使 MNN 能解析 sidecar 所在目录。 +### Device,Profiling + +RKNN internal profiling 通过 MNN 的公开 hint / info 接口暴露: + +- 开启 profiling: + - `Interpreter::setSessionHint(Interpreter::RKNN_PROFILE, 1)` + - 或 `Executor::RuntimeManager::setHint(Interpreter::RKNN_PROFILE, 1)` +- 读取 profiling 文本: + - `Interpreter::getSessionInfo(session, Interpreter::BACKEND_PROFILE, &ptr)` + - 或 `Executor::RuntimeManager::getInfo(Interpreter::BACKEND_PROFILE, &ptr)` + +其中: +- `RKNN_PROFILE` 会在 RKNN plugin 内部打开 `RKNN_FLAG_COLLECT_PERF_MASK` +- `BACKEND_PROFILE` 返回的是 `const char*`,内容包含 RKNN 导出的 `npu_run` 和 `perf_detail` 文本 +- 因为它是普通文本,所以应用层可以直接打印,也可以原样写入文件做持久化 +- 如果当前 backend 不支持 profiling,或者尚未产生 profile,返回值可能为空 + ### 当前限制 - 当前 RKNN 路径执行 `Plugin(type="RKNN")` 节点,不支持逐算子 RKNN backend。 diff --git a/express/Executor.cpp b/express/Executor.cpp index bcfc01becf..772027c688 100644 --- a/express/Executor.cpp +++ b/express/Executor.cpp @@ -283,11 +283,20 @@ bool Executor::RuntimeManager::getInfo(Interpreter::SessionInfoCode code, void* auto dst = (int*)ptr; if (!mInside->mRuntime.first.empty()) { *dst = mInside->mRuntime.first.begin()->first; + return true; } } break; case Interpreter::RESIZE_STATUS: { auto dst = (int*)ptr; *dst = mInside->mResizeStatus; + return true; + } break; + case Interpreter::BACKEND_PROFILE: { + for (auto& r : mInside->mRuntime.first) { + if (r.second != nullptr && r.second->onGetRuntimeInfo((int)code, ptr)) { + return true; + } + } } break; default: { // Do nothing @@ -694,4 +703,4 @@ void Executor::setLazyComputeMode(uint32_t mode) { } } // namespace Express -} // namespace MNN \ No newline at end of file +} // namespace MNN diff --git a/include/MNN/Interpreter.hpp b/include/MNN/Interpreter.hpp index dfc3f11d78..53620644f8 100644 --- a/include/MNN/Interpreter.hpp +++ b/include/MNN/Interpreter.hpp @@ -264,7 +264,10 @@ class MNN_PUBLIC Interpreter { CPU_SME2_NEON_DIVISION_RATIO = 17, // Set SME cores, default is 2, if supports sme - CPU_SME_CORES = 18 + CPU_SME_CORES = 18, + + // Enable backend-side profiling export for runtimes that support it. + RKNN_PROFILE = 19 }; enum ExternalPathType { @@ -463,6 +466,9 @@ class MNN_PUBLIC Interpreter { /** Mode / NumberThread, int* */ THREAD_NUMBER = 4, + /** Backend-specific profile text, const char** */ + BACKEND_PROFILE = 5, + ALL }; diff --git a/source/backend/rknn/README.md b/source/backend/rknn/README.md new file mode 100644 index 0000000000..7e3f70845b --- /dev/null +++ b/source/backend/rknn/README.md @@ -0,0 +1,298 @@ +# RKNN Backend + +This directory contains the RKNN integration for MNN. + +This file intentionally keeps the instructions generic. +For one machine-specific, real-path compilation and deployment example, see the external project README used in this integration workflow. + +Current design: +- Converter side generates two artifacts from the same ONNX model: + - a wrapper `.mnn` model containing `Plugin(type="RKNN")` + - a sidecar `.rknn` model plus bundle manifest +- Runtime side executes `Plugin("RKNN")` through the MNN CPU Plugin framework. +- There is no `MNN_FORWARD_USER_2` RKNN runtime path anymore. +- Application-side session backend remains `MNN_FORWARD_CPU`. + +## 1. Host build for `MNNConvert --rknn` + +Build a host `MNNConvert` with plugin support and RKNN converter support enabled: + +```bash +cmake -S /path/to/MNN-Agent -B /path/to/MNN-Agent/build-linux \ + -DMNN_BUILD_CONVERTER=ON \ + -DMNN_WITH_PLUGIN=ON \ + -DMNN_RKNN=ON \ + -DMNN_RKNN_CONVERT_MODE=ON \ + -DRKNN_API_INCLUDE_DIR=/path/to/rknn-toolkit2/rknpu2/runtime/Linux/librknn_api/include + +cmake --build /path/to/MNN-Agent/build-linux --target MNN MNNConvert -j8 +``` + +## 2. Generate wrapper `.mnn` + sidecar `.rknn` + +Before running `MNNConvert --rknn`, export these environment variables: + +```bash +export MNN_RKNN_TARGET=rv1126b +export MNN_RKNN_PYTHON=/path/to/python +export MNN_RKNN_SCRIPT=/path/to/to_rknn.py +export MNN_RKNN_OUTPUT_DIR=/path/to/output/sidecar +``` + +Example: + +```bash +/path/to/MNN-Agent/build-linux/MNNConvert \ + -f ONNX \ + --modelFile /path/to/model.onnx \ + --MNNModel /path/to/model.mnn \ + --rknn +``` + +Expected outputs: +- `/path/to/model.mnn` +- `${MNN_RKNN_OUTPUT_DIR}/model_.rknn` +- `${MNN_RKNN_OUTPUT_DIR}/model.rknn.bundle.json` + +The generated wrapper `.mnn` contains: +- `Input` ops for original inputs +- one `Plugin(type="RKNN")` op +- plugin attrs including: + - `model_path` + - `bundle_manifest` + - `target` + - `inputs` + - `outputs` + - `o_0`, `o_1`, ... for output shape metadata + +Important: +- `model_path` and `bundle_manifest` are emitted as relative file names. +- The validated deployment layout is: wrapper `.mnn`, sidecar `.rknn`, and bundle `.json` in the same target directory. + +## 3. Cross compile runtime for Linux aarch64 / ARMv8 + +Example cross build using the system `aarch64-linux-gnu` toolchain. +This builds the target-side runtime libraries; `MNNConvert` itself is usually only needed on the host. + +```bash +cmake -S /path/to/MNN-Agent -B /path/to/MNN-Agent/build-linux-aarch64-gnu \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + -DCMAKE_C_COMPILER=/usr/bin/aarch64-linux-gnu-gcc \ + -DCMAKE_CXX_COMPILER=/usr/bin/aarch64-linux-gnu-g++ \ + -DCMAKE_C_FLAGS='-march=armv8-a' \ + -DCMAKE_CXX_FLAGS='-march=armv8-a' \ + -DMNN_WITH_PLUGIN=ON \ + -DMNN_RKNN=ON \ + -DMNN_BUILD_CONVERTER=OFF \ + -DMNN_BUILD_DEMO=OFF \ + -DMNN_BUILD_TOOLS=ON \ + -DRKNN_API_INCLUDE_DIR=/path/to/rknn-toolkit2/rknpu2/runtime/Linux/librknn_api/include + +cmake --build /path/to/MNN-Agent/build-linux-aarch64-gnu --target MNN MNN_Express -j8 +``` + +Notes: +- `MNN_WITH_PLUGIN=ON` is required because RKNN is implemented as a Plugin op. +- `MNN_RKNN=ON` pulls in the RKNN Plugin kernels. +- `RKNN_API_INCLUDE_DIR` must point to the directory containing `rknn_api.h`. +- The RKNN runtime library is loaded at runtime via `dlopen`, not linked as a hard dependency. + +## 4. Target runtime usage + +On the target board, export the RKNN runtime library path: + +```bash +export MNN_RKNN_RUNTIME_LIB=/path/to/librknnrt.so +``` + +The wrapper `.mnn` should be deployed together with its sidecar `.rknn` and bundle manifest in the same directory on target. + +Important: +- On RK boards, commands that actually execute NPU code should be run with `sudo`. + +Runtime behavior: +- MNN loads the wrapper `.mnn` +- `Plugin(type="RKNN")` is created by the CPU Plugin framework +- the plugin loads the `.rknn` sidecar using RKNN C API +- application-side MNN backend is still `MNN_FORWARD_CPU` +- if the RKNN model expects `NHWC` but the incoming MNN tensor is `NCHW`, the plugin converts layout automatically +- if the incoming tensor is already `NHWC`, no extra layout conversion is done +- backend-side RKNN profiling can be enabled through the public hint path: + - `Interpreter::setSessionHint(Interpreter::RKNN_PROFILE, 1)` or `RuntimeManager::setHint(Interpreter::RKNN_PROFILE, 1)` + - retrieve the exported profile text through `getSessionInfo(..., Interpreter::BACKEND_PROFILE, &ptr)` or `RuntimeManager::getInfo(Interpreter::BACKEND_PROFILE, &ptr)` + - because the profile is exposed as plain text, applications can print it or write it directly to a file + +## 5. Current limitations + +- This is a sidecar-subgraph path, not a per-op RKNN backend. +- Current implementation uses host buffer copies; zero-copy is not implemented. +- Current output copy path assumes float32 outputs from RKNN runtime. +- Input layout auto-conversion currently handles the common `NCHW -> NHWC` case for 4D tensors only, and only when the RKNN model explicitly expects `NHWC`. +- Host-side PC simulation through MNN runtime requires an x86 RKNN runtime library; usually this path is meant for target boards. + +## 6. Code examples + +### 6.1 Minimal C++ example with `Interpreter` + +This example loads the wrapper `.mnn` generated by `MNNConvert --rknn` and runs it through the normal CPU backend. Internally, the `Plugin("RKNN")` node will call the RKNN C API. + +```cpp +#include +#include +#include + +#include +#include +#include +#include + +int main() { + const char* model_path = "/data/local/tmp/rejshand_epoch200_b1_nogridsample.mnn"; + + std::shared_ptr net(MNN::Interpreter::createFromFile(model_path)); + if (!net) { + std::fprintf(stderr, "createFromFile failed\n"); + return 1; + } + + MNN::ScheduleConfig config; + config.type = MNN_FORWARD_CPU; + config.numThread = 1; + + MNN::BackendConfig backendConfig; + config.backendConfig = &backendConfig; + + auto session = net->createSession(config); + if (!session) { + std::fprintf(stderr, "createSession failed\n"); + return 1; + } + + auto input = net->getSessionInput(session, "image"); + if (!input) { + std::fprintf(stderr, "getSessionInput failed\n"); + return 1; + } + + net->resizeTensor(input, {1, 3, 224, 224}); + net->resizeSession(session); + + MNN::Tensor hostInput(input, MNN::Tensor::CAFFE); + std::memset(hostInput.host(), 0, hostInput.size()); + input->copyFromHostTensor(&hostInput); + + if (net->runSession(session) != 0) { + std::fprintf(stderr, "runSession failed\n"); + return 1; + } + + auto uv = net->getSessionOutput(session, "uv"); + auto vertices = net->getSessionOutput(session, "vertices"); + if (!uv || !vertices) { + std::fprintf(stderr, "getSessionOutput failed\n"); + return 1; + } + + MNN::Tensor uvHost(uv, MNN::Tensor::CAFFE); + MNN::Tensor verticesHost(vertices, MNN::Tensor::CAFFE); + uv->copyToHostTensor(&uvHost); + vertices->copyToHostTensor(&verticesHost); + + auto uvPtr = uvHost.host(); + auto vPtr = verticesHost.host(); + std::printf("uv[0] = %f, %f\n", uvPtr[0], uvPtr[1]); + std::printf("vertices[0] = %f, %f, %f\n", vPtr[0], vPtr[1], vPtr[2]); + return 0; +} +``` + +Typical build command on target: + +```bash +aarch64-linux-gnu-g++ -O2 -std=c++11 demo_rknn_mnn.cpp \ + -I/path/to/MNN-Agent/include \ + -L/path/to/mnn/libs -lMNN -o demo_rknn_mnn +``` + +At runtime on board: + +```bash +export LD_LIBRARY_PATH=/path/to/mnn/libs:$LD_LIBRARY_PATH +export MNN_RKNN_RUNTIME_LIB=/path/to/librknnrt.so +./demo_rknn_mnn +``` + +### 6.2 Minimal `Module` example + +If you prefer the Express / Module API, load the same wrapper `.mnn` with `MNN_FORWARD_CPU`. + +```cpp +#include +#include +#include + +#include +#include +#include + +using namespace MNN::Express; + +int main() { + MNN::ScheduleConfig config; + config.type = MNN_FORWARD_CPU; + config.numThread = 1; + + std::shared_ptr rtmgr(MNN::Executor::RuntimeManager::createRuntimeManager(config)); + if (!rtmgr) { + std::fprintf(stderr, "createRuntimeManager failed\n"); + return 1; + } + + std::vector inputs = {"image"}; + std::vector outputs = {"uv", "vertices"}; + auto module = Module::load(inputs, outputs, "/data/local/tmp/rejshand_epoch200_b1_nogridsample.mnn", rtmgr); + if (!module) { + std::fprintf(stderr, "Module::load failed\n"); + return 1; + } + + auto image = _Input({1, 3, 224, 224}, NCHW, halide_type_of()); + auto imagePtr = image->writeMap(); + for (int i = 0; i < 1 * 3 * 224 * 224; ++i) { + imagePtr[i] = 0.0f; + } + + auto outputsVar = module->onForward({image}); + if (outputsVar.size() != 2) { + std::fprintf(stderr, "unexpected output size: %zu\n", outputsVar.size()); + return 1; + } + + auto uvInfo = outputsVar[0]->getInfo(); + auto verticesInfo = outputsVar[1]->getInfo(); + if (!uvInfo || !verticesInfo) { + std::fprintf(stderr, "output info is null\n"); + return 1; + } + + auto uv = outputsVar[0]->readMap(); + auto vertices = outputsVar[1]->readMap(); + std::printf("uv[0] = %f, %f\n", uv[0], uv[1]); + std::printf("vertices[0] = %f, %f, %f\n", vertices[0], vertices[1], vertices[2]); + return 0; +} +``` + +Runtime requirements are the same: + +```bash +export LD_LIBRARY_PATH=/path/to/mnn/libs:$LD_LIBRARY_PATH +export MNN_RKNN_RUNTIME_LIB=/path/to/librknnrt.so +./demo_rknn_module +``` + +## 7. Notes + +- Keep this README generic. Put machine-specific paths, standalone example source files, and one-off deployment commands in the external example project README instead. +- The standalone example program is intentionally kept outside the MNN source tree. diff --git a/source/backend/rknn/backend/RKNNBackend.cpp b/source/backend/rknn/backend/RKNNBackend.cpp index cbe5e37dc4..9d3b3661c2 100644 --- a/source/backend/rknn/backend/RKNNBackend.cpp +++ b/source/backend/rknn/backend/RKNNBackend.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -93,6 +94,9 @@ static std::string resolveModelPath(const std::string& dirPath, const std::strin if (path[0] == '/') { return path; } + if (dirPath.empty() || dirPath == ".") { + return path; + } return MNNFilePathConcat(dirPath, path); } @@ -124,6 +128,80 @@ static rknn_tensor_format mapTensorFormat(const Tensor* tensor) { static Tensor::DimensionType getHostTensorDimType(const Tensor* tensor) { return tensor->getDimensionType(); } +static bool convertLayoutIfNeeded(const Tensor* tensor, rknn_tensor_format expectFormat, + std::vector* converted, void** buf, uint32_t* size, + rknn_tensor_format* actualFormat) { + auto currentFormat = mapTensorFormat(tensor); + *actualFormat = currentFormat; + *buf = tensor->buffer().host; + *size = (uint32_t)tensor->size(); + + if (expectFormat == currentFormat) { + return true; + } + if (expectFormat != RKNN_TENSOR_NHWC || currentFormat != RKNN_TENSOR_NCHW) { + return true; + } + if (tensor->dimensions() != 4) { + MNN_ERROR("MNN_RKNN: unsupported layout conversion for %dD tensor\n", tensor->dimensions()); + return false; + } + + const int batch = tensor->batch(); + const int channel = tensor->channel(); + const int height = tensor->height(); + const int width = tensor->width(); + const int elementBytes = tensor->getType().bytes(); + if (batch <= 0 || channel <= 0 || height <= 0 || width <= 0 || elementBytes <= 0) { + MNN_ERROR("MNN_RKNN: invalid tensor shape for layout conversion\n"); + return false; + } + + converted->resize((size_t)tensor->size()); + auto src = reinterpret_cast(tensor->buffer().host); + auto dst = converted->data(); + for (int n = 0; n < batch; ++n) { + for (int h = 0; h < height; ++h) { + for (int w = 0; w < width; ++w) { + for (int c = 0; c < channel; ++c) { + const size_t srcIndex = ((((size_t)n * (size_t)channel + (size_t)c) * (size_t)height + (size_t)h) * (size_t)width + (size_t)w) * (size_t)elementBytes; + const size_t dstIndex = ((((size_t)n * (size_t)height + (size_t)h) * (size_t)width + (size_t)w) * (size_t)channel + (size_t)c) * (size_t)elementBytes; + ::memcpy(dst + dstIndex, src + srcIndex, (size_t)elementBytes); + } + } + } + } + *buf = converted->data(); + *size = (uint32_t)converted->size(); + *actualFormat = expectFormat; + return true; +} + +static std::string buildProfileString(const RKNNApi* api, rknn_context context) { + std::ostringstream oss; + rknn_perf_run perfRun; + std::memset(&perfRun, 0, sizeof(perfRun)); + auto ret = api->query(context, RKNN_QUERY_PERF_RUN, &perfRun, sizeof(perfRun)); + if (ret == RKNN_SUCC) { + oss << "npu_run : " << (double)perfRun.run_duration / 1000.0 << " ms\n"; + } else { + oss << "npu_run : unavailable\n"; + } + + rknn_perf_detail perfDetail; + std::memset(&perfDetail, 0, sizeof(perfDetail)); + ret = api->query(context, RKNN_QUERY_PERF_DETAIL, &perfDetail, sizeof(perfDetail)); + if (ret == RKNN_SUCC && perfDetail.perf_data != nullptr && perfDetail.data_len > 0) { + oss << "perf_detail:\n"; + oss.write(perfDetail.perf_data, perfDetail.data_len); + if (perfDetail.perf_data[perfDetail.data_len - 1] != '\n') { + oss << '\n'; + } + } else { + oss << "perf_detail: unavailable\n"; + } + return oss.str(); +} class RKNNPluginShape : public plugin::InferShapeKernel { public: @@ -165,6 +243,8 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { if (nullptr == mApi) { return false; } + auto runtime = ctx->backend() == nullptr ? nullptr : ctx->backend()->getRuntime(); + mEnableProfile = runtime != nullptr && runtime->hint().enableBackendProfile; mModelPath = resolveModelPath(ctx->dir_path(), getStringAttr(ctx, kModelPathAttr)); if (mModelPath.empty()) { MNN_ERROR("MNN_RKNN: Plugin(%s) requires attr %s\n", kPluginTypeName, kModelPathAttr); @@ -174,7 +254,11 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { MNN_ERROR("MNN_RKNN: model file does not exist: %s\n", mModelPath.c_str()); return false; } - if (mApi->init(&mContext, (void*)mModelPath.c_str(), 0, 0, nullptr) != RKNN_SUCC) { + uint32_t initFlags = 0; + if (mEnableProfile) { + initFlags |= RKNN_FLAG_COLLECT_PERF_MASK; + } + if (mApi->init(&mContext, (void*)mModelPath.c_str(), 0, initFlags, nullptr) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_init failed for %s\n", mModelPath.c_str()); return false; } @@ -213,7 +297,9 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { } bool compute(plugin::CPUKernelContext* ctx) override { + auto runtime = ctx->backend() == nullptr ? nullptr : ctx->backend()->getRuntime(); std::vector> hostInputs; + std::vector> convertedInputs(ctx->inputs().size()); std::vector rknnInputs(ctx->inputs().size()); for (size_t i = 0; i < ctx->inputs().size(); ++i) { auto src = ctx->input((int)i); @@ -222,13 +308,20 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { MNN_ERROR("MNN_RKNN: failed to copy input tensor %zu to host\n", i); return false; } + void* inputBuf = hostInputs.back()->buffer().host; + uint32_t inputSize = (uint32_t)hostInputs.back()->size(); + auto inputFormat = mapTensorFormat(hostInputs.back().get()); + if (!convertLayoutIfNeeded(hostInputs.back().get(), mInputAttrs[i].fmt, &convertedInputs[i], &inputBuf, &inputSize, &inputFormat)) { + MNN_ERROR("MNN_RKNN: failed to convert input tensor %zu layout\n", i); + return false; + } std::memset(&rknnInputs[i], 0, sizeof(rknn_input)); rknnInputs[i].index = (uint32_t)i; - rknnInputs[i].buf = hostInputs.back()->buffer().host; - rknnInputs[i].size = hostInputs.back()->size(); + rknnInputs[i].buf = inputBuf; + rknnInputs[i].size = inputSize; rknnInputs[i].pass_through = 0; rknnInputs[i].type = mapTensorType(hostInputs.back().get()); - rknnInputs[i].fmt = mapTensorFormat(hostInputs.back().get()); + rknnInputs[i].fmt = inputFormat; } if (mApi->inputsSet(mContext, (uint32_t)rknnInputs.size(), rknnInputs.data()) != RKNN_SUCC) { MNN_ERROR("MNN_RKNN: rknn_inputs_set failed\n"); @@ -250,6 +343,13 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { MNN_ERROR("MNN_RKNN: rknn_outputs_get failed\n"); return false; } + if (nullptr != runtime) { + if (mEnableProfile) { + runtime->setLastBackendProfile(buildProfileString(mApi, mContext)); + } else { + runtime->setLastBackendProfile(""); + } + } for (size_t i = 0; i < ctx->outputs().size(); ++i) { auto dst = ctx->output((int)i); @@ -278,6 +378,7 @@ class RKNNPluginExecute : public plugin::CPUComputeKernel { rknn_input_output_num mIoNum{}; std::vector mInputAttrs; std::vector mOutputAttrs; + bool mEnableProfile = false; }; static auto _rknn_plugin_shape_registrar __attribute__((unused)) = diff --git a/source/core/Backend.hpp b/source/core/Backend.hpp index eeebb6f645..40d93f221c 100644 --- a/source/core/Backend.hpp +++ b/source/core/Backend.hpp @@ -76,6 +76,9 @@ struct RuntimeHint { // Use CPU Ids std::vector cpuIds; + // Enable backend-side profiling export for runtimes that support it. + bool enableBackendProfile = false; + // Division ration between SME and NEON when runtime threads>=4 // Default: 41, which means that in LLM inference, // during the Prefill stage the workload @@ -228,6 +231,7 @@ class Backend : public NonCopyable { virtual bool onGetTensorInfo(const Tensor* tensor, void* dstInfo) { return false; } + virtual bool onGetSessionInfo(int code, void* ptr) const; /** * @brief clear all dynamic buffers. @@ -285,6 +289,7 @@ class Backend : public NonCopyable { /** Each backend belong to a runtime*/ class Runtime : public NonCopyable { public: + static constexpr int kSessionInfoBackendProfile = 5; /** Origin Op -> (Compiler) -> New Op -> Backend Default use Compiler_Geometry, Origin Op -> Compiler_Geometry -> Little Op @@ -306,6 +311,20 @@ class Runtime : public NonCopyable { const RuntimeHint& hint() const { return mHint; } + void setLastBackendProfile(std::string profile) const { + mLastBackendProfile = std::move(profile); + } + bool onGetRuntimeInfo(int code, void* ptr) const { + if (code == kSessionInfoBackendProfile) { + auto dst = reinterpret_cast(ptr); + if (nullptr == dst) { + return false; + } + *dst = mLastBackendProfile.empty() ? nullptr : mLastBackendProfile.c_str(); + return true; + } + return false; + } virtual CompilerType onGetCompilerType() const { return Compiler_Loop; @@ -406,6 +425,7 @@ class Runtime : public NonCopyable { private: std::future mFuture; RuntimeHint mHint; + mutable std::string mLastBackendProfile; }; /** abstract Runtime register */ @@ -440,6 +460,14 @@ class RuntimeCreator { RuntimeCreator() = default; }; +inline bool Backend::onGetSessionInfo(int code, void* ptr) const { + auto rt = const_cast(this)->getRuntime(); + if (nullptr == rt) { + return false; + } + return rt->onGetRuntimeInfo(code, ptr); +} + /** * @brief get registered backend creator for given forward type. * @param type given forward type. @@ -459,4 +487,4 @@ MNN_PUBLIC bool MNNInsertExtraRuntimeCreator(MNNForwardType type, const RuntimeC MNN_PUBLIC bool MNNCPUCopyBuffer(const Tensor* srcTensor, const Tensor* dstTensor); } // namespace MNN -#endif /* Backend_hpp */ \ No newline at end of file +#endif /* Backend_hpp */ diff --git a/source/core/Pipeline.hpp b/source/core/Pipeline.hpp index 87cd1686b6..d2a96b1fb6 100644 --- a/source/core/Pipeline.hpp +++ b/source/core/Pipeline.hpp @@ -62,6 +62,9 @@ class Pipeline : public NonCopyable { MNNForwardType getMainForwardType() const { return mInfo.first.cache.first->type(); } + Backend* getMainBackend() const { + return mInfo.first.cache.second.get(); + } typedef std::map, std::pair, std::shared_ptr>> WrapTensorCache; private: ErrorCode _allocForTensor(int index, bool allocInput); diff --git a/source/core/Session.cpp b/source/core/Session.cpp index bffbf08d54..18646475da 100644 --- a/source/core/Session.cpp +++ b/source/core/Session.cpp @@ -121,6 +121,9 @@ void Session::ModeGroup::setHint(Interpreter::HintMode hint, int value) { case Interpreter::CPU_SME_CORES: runtimeHint.smeCores = value; break; + case Interpreter::HintMode::RKNN_PROFILE: + runtimeHint.enableBackendProfile = value > 0; + break; default: break; } @@ -396,6 +399,20 @@ bool Session::getInfo(Interpreter::SessionInfoCode code, void* ptr) const { *dst = mPipelines[0]->getPipelineInfo().first.info.numThread; return true; } + case Interpreter::BACKEND_PROFILE: { + for (auto& pipeline : mPipelines) { + auto backend = pipeline->getMainBackend(); + if (nullptr != backend && backend->onGetSessionInfo((int)code, ptr)) { + return true; + } + } + for (auto& r : mRuntime.first) { + if (r.second != nullptr && r.second->onGetRuntimeInfo((int)code, ptr)) { + return true; + } + } + return false; + } // TODO: Support other debug info default: break; diff --git a/tools/converter/source/common/RKNNBundle.cpp b/tools/converter/source/common/RKNNBundle.cpp index ca7b5192aa..140cffa7c9 100644 --- a/tools/converter/source/common/RKNNBundle.cpp +++ b/tools/converter/source/common/RKNNBundle.cpp @@ -325,14 +325,22 @@ bool GenerateRKNNBundle(const modelConfig& modelPath, RKNNBundlePaths* bundlePat MNN_ERROR("Open RKNN manifest failed: %s\n", manifestPath.c_str()); return false; } + auto onnxSlash = modelPath.modelFile.find_last_of("/\\"); + auto mnnSlash = modelPath.MNNModel.find_last_of("/\\"); + auto rknnSlash = rknnPath.find_last_of("/\\"); + const auto onnxModelName = (onnxSlash == std::string::npos) ? modelPath.modelFile : modelPath.modelFile.substr(onnxSlash + 1); + const auto mnnModelName = (mnnSlash == std::string::npos) ? modelPath.MNNModel : modelPath.MNNModel.substr(mnnSlash + 1); + const auto rknnModelName = (rknnSlash == std::string::npos) ? rknnPath : rknnPath.substr(rknnSlash + 1); manifest << "{\n"; - manifest << " \"onnx_model\": \"" << modelPath.modelFile << "\",\n"; - manifest << " \"mnn_model\": \"" << modelPath.MNNModel << "\",\n"; - manifest << " \"rknn_model\": \"" << rknnPath << "\",\n"; + manifest << " \"onnx_model\": \"" << onnxModelName << "\",\n"; + manifest << " \"mnn_model\": \"" << mnnModelName << "\",\n"; + manifest << " \"rknn_model\": \"" << rknnModelName << "\",\n"; manifest << " \"target\": \"" << modelPath.rknnTarget << "\""; const auto weightPath = modelPath.MNNModel + ".weight"; if (MNNFileExist(weightPath.c_str())) { - manifest << ",\n \"mnn_external_weight\": \"" << weightPath << "\"\n"; + auto weightSlash = weightPath.find_last_of("/\\"); + const auto weightName = (weightSlash == std::string::npos) ? weightPath : weightPath.substr(weightSlash + 1); + manifest << ",\n \"mnn_external_weight\": \"" << weightName << "\"\n"; } else { manifest << "\n"; } @@ -407,8 +415,8 @@ std::unique_ptr BuildRKNNWrapperNet(const NetT& sourceNet, const modelConf rknnOp->main.type = OpParameter_Plugin; rknnOp->main.value = new PluginT; rknnOp->main.AsPlugin()->type = "RKNN"; - rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("model_path", bundlePaths.rknnPath)); - rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("bundle_manifest", bundlePaths.manifestPath)); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("model_path", basenameWithoutExtension(bundlePaths.rknnPath) + ".rknn")); + rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("bundle_manifest", basenameWithoutExtension(bundlePaths.manifestPath) + ".json")); rknnOp->main.AsPlugin()->attr.emplace_back(makeStringAttr("target", modelPath.rknnTarget)); rknnOp->main.AsPlugin()->attr.emplace_back(makeStringListAttr("inputs", inputNames)); rknnOp->main.AsPlugin()->attr.emplace_back(makeStringListAttr("outputs", outputNames));