mirror of
https://github.com/lltcggie/waifu2x-caffe.git
synced 2025-11-24 10:34:54 +00:00
途中
This commit is contained in:
parent
a63b2510c6
commit
68a6c59ea9
@ -196,7 +196,7 @@ layer {
|
||||
}
|
||||
layer {
|
||||
name: "/res1/axpy"
|
||||
type: "Axpy"
|
||||
type: "AxpyFast"
|
||||
bottom: "/res1/fc2_sigmoid"
|
||||
bottom: "/res1/conv2_relu"
|
||||
bottom: "/res1/crop"
|
||||
@ -353,7 +353,7 @@ layer {
|
||||
}
|
||||
layer {
|
||||
name: "/res2/axpy"
|
||||
type: "Axpy"
|
||||
type: "AxpyFast"
|
||||
bottom: "/res2/fc2_sigmoid"
|
||||
bottom: "/res2/conv2_relu"
|
||||
bottom: "/res2/crop"
|
||||
@ -510,7 +510,7 @@ layer {
|
||||
}
|
||||
layer {
|
||||
name: "/res3/axpy"
|
||||
type: "Axpy"
|
||||
type: "AxpyFast"
|
||||
bottom: "/res3/fc2_sigmoid"
|
||||
bottom: "/res3/conv2_relu"
|
||||
bottom: "/res3/crop"
|
||||
@ -667,7 +667,7 @@ layer {
|
||||
}
|
||||
layer {
|
||||
name: "/res4/axpy"
|
||||
type: "Axpy"
|
||||
type: "AxpyFast"
|
||||
bottom: "/res4/fc2_sigmoid"
|
||||
bottom: "/res4/conv2_relu"
|
||||
bottom: "/res4/crop"
|
||||
@ -824,7 +824,7 @@ layer {
|
||||
}
|
||||
layer {
|
||||
name: "/res5/axpy"
|
||||
type: "Axpy"
|
||||
type: "AxpyFast"
|
||||
bottom: "/res5/fc2_sigmoid"
|
||||
bottom: "/res5/conv2_relu"
|
||||
bottom: "/res5/crop"
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# include <iostream>
|
||||
# include <fstream>
|
||||
# include <iomanip>
|
||||
# include <opencv2/dnn.hpp>
|
||||
# include <opencv2/imgproc.hpp>
|
||||
# include <opencv2/imgcodecs.hpp>
|
||||
@ -280,18 +281,98 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static double sumAllElements(const cv::Mat& mat)
|
||||
{
|
||||
CV_Assert(!mat.empty());
|
||||
|
||||
const cv::Scalar s = cv::sum(mat); // チャンネルごとの合計
|
||||
double total = 0.0;
|
||||
for (int c = 0; c < mat.channels(); ++c) {
|
||||
total += s[c];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
// ---- 内部実装 ----
|
||||
template<typename T>
|
||||
static void printRec(const cv::Mat& m, std::vector<int>& idx, int d) {
|
||||
if (d == m.dims - 1) {
|
||||
// 最終軸:一次元の並びを出力
|
||||
const int cn = m.channels();
|
||||
std::cout << "[";
|
||||
for (int i = 0; i < m.size[d]; ++i) {
|
||||
idx[d] = i;
|
||||
const T* p = m.ptr<T>(idx.data()); // idx の位置の要素先頭(ch=0)へのポインタ
|
||||
if (cn == 1) {
|
||||
std::cout << p[0];
|
||||
}
|
||||
else {
|
||||
std::cout << "(";
|
||||
for (int c = 0; c < cn; ++c) {
|
||||
std::cout << p[c];
|
||||
if (c + 1 < cn) std::cout << ", ";
|
||||
}
|
||||
std::cout << ")";
|
||||
}
|
||||
if (i + 1 < m.size[d]) std::cout << ", ";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
else {
|
||||
// 途中軸:再帰で内側へ
|
||||
std::cout << "[";
|
||||
for (int i = 0; i < m.size[d]; ++i) {
|
||||
idx[d] = i;
|
||||
printRec<T>(m, idx, d + 1);
|
||||
if (i + 1 < m.size[d]) std::cout << ",\n";
|
||||
}
|
||||
std::cout << "]";
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void printMatND_T(const cv::Mat& m) {
|
||||
// 浮動小数は小数桁を控えめに
|
||||
if (std::is_floating_point<T>::value) {
|
||||
std::cout << std::fixed << std::setprecision(6);
|
||||
}
|
||||
std::vector<int> idx(m.dims, 0);
|
||||
printRec<T>(m, idx, 0);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// エントリポイント(cv::Mat の depth に応じてディスパッチ)
|
||||
static void printMatND(const cv::Mat& m) {
|
||||
switch (m.depth()) {
|
||||
case CV_8U: printMatND_T<uchar>(m); break;
|
||||
case CV_8S: printMatND_T<schar>(m); break;
|
||||
case CV_16U: printMatND_T<uint16_t>(m); break;
|
||||
case CV_16S: printMatND_T<int16_t>(m); break;
|
||||
case CV_32S: printMatND_T<int32_t>(m); break;
|
||||
case CV_32F: printMatND_T<float>(m); break;
|
||||
case CV_64F: printMatND_T<double>(m); break;
|
||||
default:
|
||||
throw std::runtime_error("Unsupported Mat depth.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void reg();
|
||||
void reg2();
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
//CV_DNN_REGISTER_LAYER_CLASS(CropCenter, CropCenterLayer);
|
||||
reg();
|
||||
reg2();
|
||||
|
||||
// ImageNet Caffeリファレンスモデル
|
||||
string protoFile = "models/upresnet10/noise0_scale2.0x_model.prototxt";
|
||||
string modelFile = "models/upresnet10/noise0_scale2.0x_model.json.caffemodel";
|
||||
|
||||
// 画像ファイル
|
||||
string imageFile = (argc > 1) ? argv[1] : "images/cat.jpg";
|
||||
//string imageFile = (argc > 1) ? argv[1] : "images/cat.jpg";
|
||||
string imageFile = "red.png";
|
||||
|
||||
// Caffeモデルの読み込み
|
||||
cv::dnn::Net net;
|
||||
@ -321,25 +402,37 @@ int main(int argc, char** argv) {
|
||||
cv::resize(img, img, cv::Size(cropSize, cropSize));
|
||||
// Caffeで扱うBlob形式に変換 (実体はcv::Matのラッパークラス)
|
||||
const auto inputBlob = cv::dnn::blobFromImage(img, 1.0 / 255.0, cv::Size(), cv::Scalar(), true, false, CV_32F);
|
||||
|
||||
//printMatND(inputBlob);
|
||||
|
||||
std::vector<int> indim(inputBlob.size.p, inputBlob.size.p + inputBlob.size.dims());
|
||||
// 入力層に画像を入力
|
||||
net.setInput(inputBlob);
|
||||
net.setInput(inputBlob, "input");
|
||||
// フォワードパス(順伝播)の計算&出力層(Softmax)の出力を取得, ここに予測結果が格納されている
|
||||
// ImageNet 1000クラス毎の確率(32bits浮動小数点値)が格納された1x1000の行列(ベクトル)
|
||||
const auto probMat = net.forward();
|
||||
//const auto probMat = net.forward("/conv_post");
|
||||
const auto probMat = net.forward("/res1/axpy");
|
||||
|
||||
std::vector<int> probMatDim(probMat.size.p, probMat.size.p + probMat.size.dims());
|
||||
auto sss = sumAllElements(probMat);
|
||||
//printMatND(probMat);
|
||||
|
||||
std::vector<cv::Mat> outImgs;
|
||||
cv::dnn::imagesFromBlob(probMat, outImgs);
|
||||
//cv::dnn::imagesFromBlob(inputBlob, outImgs);
|
||||
auto outImg = outImgs[0];
|
||||
|
||||
std::vector<int> outdim(outImg.size.p, outImg.size.p + outImg.size.dims());
|
||||
printMatND(outImg);
|
||||
|
||||
//std::cout << cv::format(outImg, cv::Formatter::FMT_DEFAULT) << std::endl;
|
||||
|
||||
// 値を0~1にクリッピング
|
||||
cv::threshold(outImg, outImg, 1.0, 1.0, cv::THRESH_TRUNC);
|
||||
cv::threshold(outImg, outImg, 0.0, 0.0, cv::THRESH_TOZERO);
|
||||
|
||||
const double clip_eps8 = (1.0 / 255.0) * 0.5 - (1.0e-7 * (1.0 / 255.0) * 0.5);
|
||||
outImg.convertTo(outImg, CV_8U, 255.0, clip_eps8);
|
||||
std::vector<int> outdim(outImg.size.p, outImg.size.p + outImg.size.dims());
|
||||
|
||||
cv::cvtColor(outImg, outImg, cv::COLOR_RGB2BGR);
|
||||
|
||||
|
||||
106
waifu2x-caffe/axpy.hpp
Normal file
106
waifu2x-caffe/axpy.hpp
Normal file
@ -0,0 +1,106 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP
|
||||
#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP
|
||||
|
||||
//#include "../../op_cuda.hpp"
|
||||
//
|
||||
//#include "../csl/stream.hpp"
|
||||
//#include "../csl/tensor.hpp"
|
||||
//
|
||||
//#include "../kernels/scale_shift.hpp"
|
||||
|
||||
#include <op_cuda.hpp>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#include <cuda4dnn/kernels/scale_shift.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
namespace cuda4dnn {
|
||||
|
||||
template <class T>
|
||||
class AxpyOp final : public CUDABackendNode {
|
||||
public:
|
||||
using wrapper_type = GetCUDABackendWrapperType<T>;
|
||||
|
||||
AxpyOp(csl::Stream stream_)
|
||||
: stream(std::move(stream_)), axis(0)
|
||||
{
|
||||
}
|
||||
|
||||
void forward(
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<cv::Ptr<BackendWrapper>>& outputs,
|
||||
csl::Workspace& workspace) override
|
||||
{
|
||||
CV_Assert(inputs.size() == 3);
|
||||
CV_Assert(outputs.size() == 1);
|
||||
|
||||
auto input_wrapper = inputs[1].dynamicCast<wrapper_type>();
|
||||
auto input = input_wrapper->getView();
|
||||
|
||||
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
|
||||
auto output = output_wrapper->getSpan();
|
||||
|
||||
/* number of batches in the weights/bias
|
||||
* trainable mode: same for all batches
|
||||
* untrainable mode: could be different for different batch samples
|
||||
*/
|
||||
std::size_t parameter_batch_size = 1;
|
||||
|
||||
csl::TensorView<T> weights = inputs[0].dynamicCast<wrapper_type>()->getView();
|
||||
parameter_batch_size = weights.get_axis_size(0);
|
||||
CV_Assert(parameter_batch_size == input.get_axis_size(0));
|
||||
|
||||
csl::TensorView<T> bias = inputs[2].dynamicCast<wrapper_type>()->getView();
|
||||
parameter_batch_size = bias.get_axis_size(0);
|
||||
CV_Assert(parameter_batch_size == input.get_axis_size(0));
|
||||
|
||||
CV_Assert(!weights.empty() || !bias.empty());
|
||||
if (!weights.empty() && !bias.empty())
|
||||
{
|
||||
CV_CheckEQ(weights.size(), bias.size(), "different broadcasting options for weights and bias is not supported");
|
||||
}
|
||||
|
||||
const auto num_parameters = !weights.empty() ? weights.size() : bias.size();
|
||||
const auto mid_size = num_parameters / parameter_batch_size;
|
||||
|
||||
/* the scale shift operation might require broadcasting */
|
||||
const int end_axis = [&] {
|
||||
if (num_parameters == 1) {
|
||||
return static_cast<int>(axis + 1);
|
||||
}
|
||||
for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) {
|
||||
if (input.size_range(axis, endAxis) == mid_size)
|
||||
return endAxis;
|
||||
}
|
||||
CV_Assert(0 /* failed to find a broadcast config */);
|
||||
}();
|
||||
|
||||
std::size_t inner_size = input.size_range(end_axis, input.rank());
|
||||
|
||||
if (!weights.empty() && !bias.empty())
|
||||
kernels::scaleN_with_biasN<T>(stream, output, input, inner_size, weights, bias);
|
||||
else if (!weights.empty())
|
||||
kernels::scaleN<T>(stream, output, input, inner_size, weights);
|
||||
else
|
||||
kernels::biasN<T>(stream, output, input, inner_size, bias);
|
||||
}
|
||||
|
||||
private:
|
||||
csl::Stream stream;
|
||||
std::size_t axis;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
} /* namespace cv::dnn::cuda4dnn */
|
||||
|
||||
#endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP */
|
||||
371
waifu2x-caffe/axpy_fast_layer.cpp
Normal file
371
waifu2x-caffe/axpy_fast_layer.cpp
Normal file
@ -0,0 +1,371 @@
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
//#include <opencv2/core/opencl/ocl_defs.hpp>
|
||||
#include <cvconfig.h>
|
||||
|
||||
//#include <layers/layers_common.hpp>
|
||||
#include <op_cuda.hpp>
|
||||
#include <op_halide.hpp>
|
||||
#include <op_inf_engine.hpp>
|
||||
#include <ie_ngraph.hpp>
|
||||
#include <op_webnn.hpp>
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
#include <opencv2/core/utils/trace.hpp>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
//#include <cuda4dnn/primitives/scale_shift.hpp>
|
||||
#include "axpy.hpp"
|
||||
using namespace cv::dnn::cuda4dnn;
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
|
||||
class AxpyFastLayerImpl CV_FINAL : public Layer
|
||||
{
|
||||
public:
|
||||
#ifdef HAVE_WEBNN
|
||||
mutable int dims;
|
||||
mutable int numChannels;
|
||||
#endif
|
||||
AxpyFastLayerImpl(const LayerParams& params)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape>& inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape>& outputs,
|
||||
std::vector<MatShape>& internals) const CV_OVERRIDE
|
||||
{
|
||||
outputs.assign(1, inputs[1]);
|
||||
#ifdef HAVE_WEBNN
|
||||
dims = inputs[0].size();
|
||||
numChannels = 1;
|
||||
if (inputs.size() > 1)
|
||||
{
|
||||
for (const size_t& dim : inputs[1])
|
||||
numChannels *= dim;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> inputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
CV_Assert(inputs.size() == 3);
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return true;
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
backendId == DNN_BACKEND_WEBNN;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (inputs_arr.depth() == CV_16F)
|
||||
{
|
||||
forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert_N(outputs.size() == 1, inputs.size() == 3);
|
||||
|
||||
Mat& inpBlob = inputs[1];
|
||||
Mat& outBlob = outputs[0];
|
||||
// There is a mode when we multiply a first blob by a second one
|
||||
// instead of trainable weights.
|
||||
Mat weights = inputs[0].reshape(1, 1);
|
||||
Mat bias = inputs[2].reshape(1, 1);
|
||||
|
||||
MatShape inpShape0 = shape(inputs[0]);
|
||||
MatShape inpShape1 = shape(inputs[1]);
|
||||
MatShape inpShape2 = shape(inputs[2]);
|
||||
|
||||
// TODO: 向こうが想定しているbiasがこちらが想定しているshapeと違う想定っぽいので計算処理を書き直す
|
||||
// こちらが想定しているの: weights.shape == bias.shape
|
||||
// 向こうが想定しているの: inpBlob.shape == bias.shape
|
||||
|
||||
MatShape inpShape = shape(inpBlob);
|
||||
const int numWeights = weights.total();
|
||||
CV_Assert(numWeights != 0);
|
||||
CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs");
|
||||
|
||||
if (weights.total() == 1)
|
||||
{
|
||||
// The total() of bias should be same as weights.
|
||||
inpBlob.convertTo(outBlob, CV_32F, weights.at<float>(0), bias.at<float>(0));
|
||||
return;
|
||||
}
|
||||
|
||||
int endAxis;
|
||||
for (endAxis = 1; endAxis <= inpBlob.dims; ++endAxis)
|
||||
{
|
||||
if (total(inpShape, 0, endAxis) == numWeights)
|
||||
break;
|
||||
}
|
||||
CV_Assert(total(inpShape, 0, endAxis) == numWeights);
|
||||
CV_Assert(numWeights == bias.total());
|
||||
CV_CheckTypeEQ(inpBlob.type(), CV_32FC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_32FC1, "");
|
||||
|
||||
int numSlices = total(inpShape, 0, 0);
|
||||
float* inpData = (float*)inpBlob.data;
|
||||
float* outData = (float*)outBlob.data;
|
||||
|
||||
if (endAxis != inpBlob.dims)
|
||||
{
|
||||
float* weightsData = (float*)weights.data;
|
||||
float* biasesData = (float*)bias.data;
|
||||
int spatialSize = total(inpShape, endAxis); // spatialSize != 1
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
for (int j = 0; j < numWeights; ++j)
|
||||
{
|
||||
float w = weightsData ? weightsData[j] : 1;
|
||||
float b = biasesData ? biasesData[j] : 0;
|
||||
Mat inpSlice(1, spatialSize, CV_32F, inpData);
|
||||
Mat outSlice(1, spatialSize, CV_32F, outData);
|
||||
|
||||
inpSlice.convertTo(outSlice, CV_32F, w, b);
|
||||
|
||||
inpData += spatialSize;
|
||||
outData += spatialSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < numSlices; ++i)
|
||||
{
|
||||
Mat inpSlice(1, numWeights, CV_32F, inpData);
|
||||
Mat outSlice(1, numWeights, CV_32F, outData);
|
||||
|
||||
multiply(inpSlice, weights, outSlice);
|
||||
add(outSlice, bias, outSlice);
|
||||
|
||||
inpData += numWeights;
|
||||
outData += numWeights;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
Ptr<BackendNode> initCUDA(
|
||||
void* context_,
|
||||
const std::vector<Ptr<BackendWrapper>>& inputs,
|
||||
const std::vector<Ptr<BackendWrapper>>& outputs
|
||||
) override
|
||||
{
|
||||
auto context = reinterpret_cast<csl::CSLContext*>(context_);
|
||||
|
||||
CV_Assert(inputs.size() == 3);
|
||||
|
||||
return make_cuda_node<cuda4dnn::AxpyOp>(preferableTarget, std::move(context->stream));
|
||||
}
|
||||
#endif
|
||||
|
||||
virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node) CV_OVERRIDE
|
||||
{
|
||||
switch (node->backendId)
|
||||
{
|
||||
case DNN_BACKEND_HALIDE:
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
auto base = node.dynamicCast<HalideBackendNode>();
|
||||
Halide::Func& input = base->funcs.back();
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(base, top));
|
||||
#endif // HAVE_HALIDE
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> >& inputs) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
Halide::Buffer<float> input = halideBuffer(inputs[0]);
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
Halide::Func top = attachHalide(input(x, y, c, n));
|
||||
return Ptr<BackendNode>(new HalideBackendNode(top));
|
||||
#endif // HAVE_HALIDE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
// attachHalide can work both with Halide::Buffer and Halide::Func. In the
|
||||
// second case it will be a fusion.
|
||||
Halide::Func attachHalide(const Halide::Expr& input)
|
||||
{
|
||||
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n");
|
||||
|
||||
const int numChannels = blobs[0].total();
|
||||
|
||||
Halide::Expr topExpr = input;
|
||||
if (hasWeights)
|
||||
{
|
||||
auto weights = wrapToHalideBuffer(blobs[0], { numChannels });
|
||||
topExpr *= weights(c);
|
||||
}
|
||||
if (hasBias)
|
||||
{
|
||||
auto bias = wrapToHalideBuffer(blobs.back(), { numChannels });
|
||||
topExpr += bias(c);
|
||||
}
|
||||
top(x, y, c, n) = topExpr;
|
||||
return top;
|
||||
}
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto ieInpNode0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
ov::Output<ov::Node> ieInpNode1;
|
||||
if (nodes.size() > 1)
|
||||
ieInpNode1 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
size_t numChannels = 1;
|
||||
if (blobs.empty())
|
||||
for (const size_t& dim : ieInpNode1.get_shape())
|
||||
numChannels *= dim;
|
||||
else
|
||||
numChannels = blobs[0].total();
|
||||
|
||||
std::vector<size_t> shape(ieInpNode0.get_shape().size(), 1);
|
||||
int cAxis = normalize_axis(axis, shape.size());
|
||||
shape[cAxis] = numChannels;
|
||||
|
||||
std::shared_ptr<ov::Node> node;
|
||||
if (hasWeights)
|
||||
{
|
||||
ov::Output<ov::Node> weight = blobs.empty() ? ieInpNode1 :
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape(shape), blobs[0].data);
|
||||
node = std::make_shared<ov::op::v1::Multiply>(ieInpNode0, weight, ov::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
if (hasBias || !hasWeights)
|
||||
{
|
||||
ov::Output<ov::Node> bias;
|
||||
if (hasBias)
|
||||
{
|
||||
bias = blobs.empty() ? ieInpNode1 :
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32,
|
||||
ov::Shape(shape), blobs.back().data);
|
||||
}
|
||||
else
|
||||
bias = std::make_shared<ov::op::v0::Constant>(ov::element::f32,
|
||||
ov::Shape(shape), std::vector<float>(numChannels, 0).data());
|
||||
node = std::make_shared<ov::op::v1::Add>(node, bias, ov::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
virtual Ptr<BackendNode> initWebnn(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
Ptr<WebnnBackendNode> node = nodes[0].dynamicCast<WebnnBackendNode>();
|
||||
auto& webnnInpOperand0 = node->operand;
|
||||
auto& webnnGraphBuilder = node->net->builder;
|
||||
auto webnnInpOperand1 = nodes.size() > 1 ? nodes[1].dynamicCast<WebnnBackendNode>()->operand : nullptr;
|
||||
auto webnnInpOperand2 = nodes.size() > 2 ? nodes[1].dynamicCast<WebnnBackendNode>()->operand : nullptr;
|
||||
std::vector<int32_t> shape(dims, 1);
|
||||
|
||||
size_t channels = 1;
|
||||
if (blobs.empty())
|
||||
channels = numChannels;
|
||||
else
|
||||
channels = blobs[0].total();
|
||||
|
||||
int cAxis = normalize_axis(axis, shape.size());
|
||||
shape[cAxis] = channels;
|
||||
|
||||
ml::Operand operand = webnnInpOperand0;
|
||||
if (hasWeights)
|
||||
{
|
||||
ml::Operand webnnWeights = blobs.empty() ? webnnInpOperand1 : webnn::BuildConstant(webnnGraphBuilder, webnn::getShape(blobs[0]), blobs[0].data, blobs[0].total() * blobs[0].elemSize(), ml::OperandType::Float32);
|
||||
webnnWeights = webnnGraphBuilder.Reshape(webnnWeights, shape.data(), shape.size());
|
||||
operand = webnnGraphBuilder.Mul(operand, webnnWeights);
|
||||
}
|
||||
if (hasBias)
|
||||
{
|
||||
ml::Operand webnnBias;
|
||||
if (!hasWeights)
|
||||
webnnBias = blobs.empty() ? webnnInpOperand1 : webnn::BuildConstant(webnnGraphBuilder, webnn::getShape(blobs.back()), blobs.back().data, blobs.back().total() * blobs.back().elemSize(), ml::OperandType::Float32);
|
||||
else
|
||||
webnnBias = blobs.empty() ? webnnInpOperand2 : webnn::BuildConstant(webnnGraphBuilder, webnn::getShape(blobs.back()), blobs.back().data, blobs.back().total() * blobs.back().elemSize(), ml::OperandType::Float32);
|
||||
webnnBias = webnnGraphBuilder.Reshape(webnnBias, shape.data(), shape.size());
|
||||
operand = webnnGraphBuilder.Add(operand, webnnBias);
|
||||
}
|
||||
|
||||
return Ptr<BackendNode>(new WebnnBackendNode(operand));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE
|
||||
{
|
||||
scale = Mat();
|
||||
shift = Mat();
|
||||
}
|
||||
|
||||
//bool tryQuantize(const std::vector<std::vector<float> >& scales,
|
||||
// const std::vector<std::vector<int> >& zeropoints, LayerParams& params) CV_OVERRIDE
|
||||
//{
|
||||
// params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size()));
|
||||
// params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
|
||||
// return true;
|
||||
//}
|
||||
|
||||
virtual int64 getFLOPS(const std::vector<MatShape>& inputs,
|
||||
const std::vector<MatShape>& outputs) const CV_OVERRIDE
|
||||
{
|
||||
CV_UNUSED(outputs); // suppress unused variable warning
|
||||
long flops = 0;
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
flops += 3 * total(inputs[i]);
|
||||
}
|
||||
return flops;
|
||||
}
|
||||
|
||||
static Ptr<AxpyFastLayerImpl> create(const LayerParams& params)
|
||||
{
|
||||
return Ptr<AxpyFastLayerImpl>(new AxpyFastLayerImpl(params));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
|
||||
# include <opencv2/dnn/layer.details.hpp>
|
||||
|
||||
void reg2()
|
||||
{
|
||||
CV_DNN_REGISTER_LAYER_CLASS(AxpyFast, cv::dnn::AxpyFastLayerImpl);
|
||||
}
|
||||
@ -104,6 +104,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="axpy_fast_layer.cpp" />
|
||||
<ClCompile Include="slice_layer.cpp" />
|
||||
<ClCompile Include="Source.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
@ -115,6 +116,7 @@
|
||||
<ClInclude Include="..\common\cNet.h" />
|
||||
<ClInclude Include="..\common\stImage.h" />
|
||||
<ClInclude Include="..\common\waifu2x.h" />
|
||||
<ClInclude Include="axpy.hpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
||||
@ -36,6 +36,9 @@
|
||||
<ClCompile Include="slice_layer.cpp">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="axpy_fast_layer.cpp">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\common\waifu2x.h">
|
||||
@ -47,5 +50,8 @@
|
||||
<ClInclude Include="..\common\stImage.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="axpy.hpp">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Loading…
x
Reference in New Issue
Block a user