73 lines
3.5 KiB
Diff
73 lines
3.5 KiB
Diff
From 82616eec41f6a6989a3b507822c17fc81a10e296 Mon Sep 17 00:00:00 2001
|
|
From: zihaomu <zihaomu@outlook.com>
|
|
Date: Mon, 9 Jan 2023 13:40:04 +0800
|
|
Subject: [PATCH] fix possible segmentation fault error in winograd on x86
|
|
|
|
---
|
|
.../src/layers/fast_convolution/fast_convolution.avx2.cpp | 2 +-
|
|
.../dnn/src/layers/fast_convolution/fast_convolution.cpp | 8 +++++++-
|
|
.../src/layers/fast_convolution/winograd_3x3s1_f63.cpp | 4 ++--
|
|
3 files changed, 10 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp
|
|
index 0d3c1447626a..c98fbe72bda8 100644
|
|
--- a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp
|
|
+++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp
|
|
@@ -119,7 +119,7 @@ void convBlock_AVX2(int np, const float* a, const float* b, float* c, int ldc, b
|
|
void _fx_winograd_accum_f32(const float* inwptr, const float* wptr,
|
|
float* outbuf, int Cg, int iblock)
|
|
{
|
|
- CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4);// && _FX_WINO_ATOM_F32 == 8);
|
|
+ CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 8);
|
|
if (iblock > 3)
|
|
{
|
|
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++,
|
|
diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
|
|
index 1cde7b324f6f..946980bebe49 100644
|
|
--- a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
|
|
+++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp
|
|
@@ -105,6 +105,12 @@ Ptr<FastConv> initFastConv(
|
|
conv->conv_type = _FX_CONV_TYPE_GENERIC;
|
|
#endif
|
|
|
|
+#if CV_TRY_AVX2
|
|
+ // Disabel Winograd when CV_TRY_AVX2 is true, but conv->useAVX2 is false.
|
|
+ if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && !conv->useAVX2)
|
|
+ conv->conv_type = _FX_CONV_TYPE_GENERIC;
|
|
+#endif
|
|
+
|
|
Mat weightsMat = _weightsMat.getMat();
|
|
auto wShape = shape(weightsMat);
|
|
const size_t wstep = weightsMat.step1();
|
|
@@ -257,7 +263,7 @@ Ptr<FastConv> initFastConv(
|
|
// we can always read MR elements starting from any valid index
|
|
{
|
|
int k = 0, nbias = K + VEC_ALIGN;
|
|
- conv->biasBuf.reserve(nbias);
|
|
+ conv->biasBuf.resize(nbias);
|
|
float* biasBufPtr = conv->biasBuf.data();
|
|
for(; k < K; k++)
|
|
biasBufPtr[k] = srcBias ? srcBias[k] : 0.f;
|
|
diff --git a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp
|
|
index e3b80884102a..b0ccfd0cd24a 100644
|
|
--- a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp
|
|
+++ b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp
|
|
@@ -22,7 +22,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr,
|
|
float* outbuf, int Cg, int iblock)
|
|
{
|
|
#if CV_NEON && CV_NEON_AARCH64
|
|
- CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4);
|
|
+ CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4);
|
|
if (iblock > 3)
|
|
{
|
|
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++,
|
|
@@ -144,7 +144,7 @@ _fx_winograd_accum_f32(const float* inwptr, const float* wptr,
|
|
}
|
|
}
|
|
#elif CV_SIMD128
|
|
- CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4);
|
|
+ CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4);
|
|
for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++,
|
|
outbuf += _FX_WINO_ATOM_F32)
|
|
{
|