Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/layer/arm/convolution_3x3_winograd.h
Original file line number Diff line number Diff line change
Expand Up @@ -4675,7 +4675,7 @@ static inline void conv3x3s1_winograd23_transform_input_tile(const Mat& bottom_b
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w - 1) / 2;

Expand Down Expand Up @@ -5174,7 +5174,7 @@ static inline void conv3x3s1_winograd23_transform_output_tile(const Mat& top_til
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 1) / 2;

Expand Down Expand Up @@ -5819,7 +5819,7 @@ static inline void conv3x3s1_winograd43_transform_input_tile(const Mat& bottom_b
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w + 1) / 4;

Expand Down Expand Up @@ -6620,7 +6620,7 @@ static inline void conv3x3s1_winograd43_transform_output_tile(const Mat& top_til
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 3) / 4;

Expand Down Expand Up @@ -7506,7 +7506,7 @@ static inline void conv3x3s1_winograd63_transform_input_tile(const Mat& bottom_b
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w + 3) / 6;

Expand Down Expand Up @@ -8472,7 +8472,7 @@ static inline void conv3x3s1_winograd63_transform_output_tile(const Mat& top_til
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 5) / 6;

Expand Down
12 changes: 6 additions & 6 deletions src/layer/arm/convolution_3x3_winograd_bf16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ static inline void conv3x3s1_winograd23_transform_input_tile_bf16s(const Mat& bo
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w - 1) / 2;

Expand Down Expand Up @@ -514,7 +514,7 @@ static inline void conv3x3s1_winograd23_transform_output_tile_bf16s(const Mat& t
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 1) / 2;

Expand Down Expand Up @@ -1055,7 +1055,7 @@ static inline void conv3x3s1_winograd43_transform_input_tile_bf16s(const Mat& bo
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w + 1) / 4;

Expand Down Expand Up @@ -1858,7 +1858,7 @@ static inline void conv3x3s1_winograd43_transform_output_tile_bf16s(const Mat& t
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 3) / 4;

Expand Down Expand Up @@ -2634,7 +2634,7 @@ static inline void conv3x3s1_winograd63_transform_input_tile_bf16s(const Mat& bo
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w + 3) / 6;

Expand Down Expand Up @@ -3604,7 +3604,7 @@ static inline void conv3x3s1_winograd63_transform_output_tile_bf16s(const Mat& t
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 5) / 6;

Expand Down
8 changes: 4 additions & 4 deletions src/layer/arm/convolution_3x3_winograd_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -3626,7 +3626,7 @@ static inline void conv3x3s1_winograd23_transform_input_tile_int8(const Mat& bot
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w - 1) / 2;

Expand Down Expand Up @@ -3919,7 +3919,7 @@ static inline void conv3x3s1_winograd23_transform_output_tile_int8(const Mat& to
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 1) / 2;

Expand Down Expand Up @@ -4484,7 +4484,7 @@ static inline void conv3x3s1_winograd43_transform_input_tile_int8(const Mat& bot
const int w = bottom_blob.w;
const int h = bottom_blob.h;
const int elempack = bottom_blob.elempack;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int w_tiles = (w + 1) / 4;

Expand Down Expand Up @@ -4896,7 +4896,7 @@ static inline void conv3x3s1_winograd43_transform_output_tile_int8(const Mat& to
const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int N = top_blob.cstep * out_elempack;
const size_t N = top_blob.cstep * out_elempack;

const int w_tiles = (outw + 3) / 4;

Expand Down
2 changes: 1 addition & 1 deletion src/layer/arm/convolution_im2col_gemm.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ static void convolution_gemm_transB_packed_tile(const Mat& AT_tile, const Mat& B
// NCNN_LOGE("convolution_gemm_transB_packed_tile %d %d %d %d %d %d", i, max_ii, j, max_jj, k, max_kk);

const int out_elempack = top_blob.elempack;
const int out_hstep = (int)top_blob.cstep;
const size_t out_hstep = top_blob.cstep;

const float* pAT = AT_tile;
const float* pBT = BT_tile;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/arm/convolution_im2col_gemm_bf16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ static void convolution_gemm_transB_packed_tile_bf16s(const Mat& AT_tile, const
// NCNN_LOGE("convolution_gemm_transB_packed_tile_bf16s %d %d %d %d %d %d", i, max_ii, j, max_jj, k, max_kk);

const int out_elempack = top_blob.elempack;
const int out_hstep = (int)top_blob.cstep;
const size_t out_hstep = top_blob.cstep;

const unsigned short* pAT = AT_tile;
const unsigned short* pBT = BT_tile;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/arm/convolution_im2col_gemm_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ static void convolution_gemm_transB_packed_tile_fp16sa(const Mat& AT_tile, const
// NCNN_LOGE("convolution_gemm_transB_packed_tile_fp16sa %d %d %d %d %d %d", i, max_ii, j, max_jj, k, max_kk);

const int out_elempack = top_blob.elempack;
const int out_hstep = (int)top_blob.cstep;
const size_t out_hstep = top_blob.cstep;

const __fp16* pAT = AT_tile;
const __fp16* pBT = BT_tile;
Expand Down
4 changes: 2 additions & 2 deletions src/layer/arm/convolution_packed.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,14 +499,14 @@ static void convolution_packed(const Mat& bottom_blob, Mat& top_blob, const Mat&
const int elempack = bottom_blob.elempack;
const int inch = bottom_blob.c * elempack;

const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int outch = top_blob.c * out_elempack;

const int M = top_blob.cstep * out_elempack;
const size_t M = top_blob.cstep * out_elempack;

const int maxk = kernel_w * kernel_h;

Expand Down
4 changes: 2 additions & 2 deletions src/layer/arm/convolution_packed_bf16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -499,14 +499,14 @@ static void convolution_packed_bf16s(const Mat& bottom_blob, Mat& top_blob, cons
const int elempack = bottom_blob.elempack;
const int inch = bottom_blob.c * elempack;

const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int outch = top_blob.c * out_elempack;

const int M = top_blob.cstep * out_elempack;
const size_t M = top_blob.cstep * out_elempack;

const int maxk = kernel_w * kernel_h;

Expand Down
8 changes: 4 additions & 4 deletions src/layer/arm/convolution_packed_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -448,14 +448,14 @@ static void convolution_packed_fp16s(const Mat& bottom_blob, Mat& top_blob, cons
const int elempack = bottom_blob.elempack;
const int inch = bottom_blob.c * elempack;

const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int outch = top_blob.c * out_elempack;

const int M = top_blob.cstep * out_elempack;
const size_t M = top_blob.cstep * out_elempack;

const int maxk = kernel_w * kernel_h;

Expand Down Expand Up @@ -1223,14 +1223,14 @@ static void convolution_packed_fp16sa(const Mat& bottom_blob, Mat& top_blob, con
const int elempack = bottom_blob.elempack;
const int inch = bottom_blob.c * elempack;

const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int outw = top_blob.w;
const int outh = top_blob.h;
const int out_elempack = top_blob.elempack;
const int outch = top_blob.c * out_elempack;

const int M = top_blob.cstep * out_elempack;
const size_t M = top_blob.cstep * out_elempack;

const int maxk = kernel_w * kernel_h;

Expand Down
12 changes: 6 additions & 6 deletions src/layer/arm/convolution_packed_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
const int elempack = bottom_blob.elempack;
const int inch = bottom_blob.c * elempack;

const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

const int outw = top_blob.w;
const int outh = top_blob.h;
Expand Down Expand Up @@ -576,8 +576,8 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
// shadowed variable for less openmp task args
const int outw = top_blob.w;
const int outh = top_blob.h;
const int N = bottom_blob.cstep * elempack;
const int M = top_blob.cstep * out_elempack;
const size_t N = bottom_blob.cstep * elempack;
const size_t M = top_blob.cstep * out_elempack;

int* outptr = top_blob.channel(p / out_elempack);

Expand Down Expand Up @@ -908,8 +908,8 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
// shadowed variable for less openmp task args
const int outw = top_blob.w;
const int outh = top_blob.h;
const int N = bottom_blob.cstep * elempack;
const int M = top_blob.cstep * out_elempack;
const size_t N = bottom_blob.cstep * elempack;
const size_t M = top_blob.cstep * out_elempack;

int* outptr = top_blob.channel(p / out_elempack);

Expand Down Expand Up @@ -1167,7 +1167,7 @@ static void convolution_packed_int8(const Mat& bottom_blob, Mat& top_blob, const
// shadowed variable for less openmp task args
const int outw = top_blob.w;
const int outh = top_blob.h;
const int N = bottom_blob.cstep * elempack;
const size_t N = bottom_blob.cstep * elempack;

int* outptr0 = top_blob.channel(p);
int* outptr1 = top_blob.channel(p + 1);
Expand Down
12 changes: 6 additions & 6 deletions src/layer/arm/gemm_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Gemm_arm::Gemm_arm()
void pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
{
const int elempack = A.elempack;
const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w;
const size_t A_hstep = A.dims == 3 ? A.cstep : (size_t)A.w;

float* pp = AT;

Expand Down Expand Up @@ -256,7 +256,7 @@ void pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
static void transpose_pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
{
const int elempack = A.elempack;
const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w;
const size_t A_hstep = A.dims == 3 ? A.cstep : (size_t)A.w;

float* pp = AT;

Expand Down Expand Up @@ -400,7 +400,7 @@ static void transpose_pack_A_tile(const Mat& A, Mat& AT, int i, int max_ii, int
static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max_kk)
{
const int elempack = B.elempack;
const int B_hstep = B.dims == 3 ? (int)B.cstep : B.w;
const size_t B_hstep = B.dims == 3 ? B.cstep : (size_t)B.w;

float* pp = BT;

Expand Down Expand Up @@ -708,7 +708,7 @@ static void pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max
static void transpose_pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int k, int max_kk)
{
const int elempack = B.elempack;
const int B_hstep = B.dims == 3 ? (int)B.cstep : B.w;
const size_t B_hstep = B.dims == 3 ? B.cstep : (size_t)B.w;

float* pp = BT;

Expand Down Expand Up @@ -895,7 +895,7 @@ static void transpose_pack_B_tile(const Mat& B, Mat& BT, int j, int max_jj, int
static void transpose_unpack_output_tile(const Mat& topT, Mat& top_blob, int i, int max_ii, int j, int max_jj)
{
const int out_elempack = top_blob.elempack;
const int out_hstep = top_blob.dims == 3 ? (int)top_blob.cstep : top_blob.w;
const size_t out_hstep = top_blob.dims == 3 ? top_blob.cstep : (size_t)top_blob.w;

const float* pp = topT;

Expand Down Expand Up @@ -1042,7 +1042,7 @@ static void transpose_unpack_output_tile(const Mat& topT, Mat& top_blob, int i,
static void gemm_transB_packed_tile(const Mat& AT_tile, const Mat& BT_tile, const Mat& CT_tile, Mat& topT_tile, Mat& top_blob, int broadcast_type_C, int i, int max_ii, int j, int max_jj, int k, int max_kk, bool k_end)
{
const int out_elempack = top_blob.elempack;
const int out_hstep = top_blob.dims == 3 ? (int)top_blob.cstep : top_blob.w;
const size_t out_hstep = top_blob.dims == 3 ? top_blob.cstep : (size_t)top_blob.w;

const float* pAT = AT_tile;
const float* pBT = BT_tile;
Expand Down
2 changes: 1 addition & 1 deletion src/layer/arm/gemm_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace ncnn {
static void gemm_transB_packed_tile_fp16sa(const Mat& AT_tile, const Mat& BT_tile, const Mat& CT_tile, Mat& topT_tile, Mat& top_blob, int broadcast_type_C, int i, int max_ii, int j, int max_jj, int k, int max_kk, bool k_end)
{
const int out_elempack = top_blob.elempack;
const int out_hstep = top_blob.dims == 3 ? (int)top_blob.cstep : top_blob.w;
const size_t out_hstep = top_blob.dims == 3 ? top_blob.cstep : (size_t)top_blob.w;

const __fp16* pAT = AT_tile;
const __fp16* pBT = BT_tile;
Expand Down
12 changes: 6 additions & 6 deletions src/layer/arm/gemm_bf16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

static void pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
{
const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w;
const size_t A_hstep = A.dims == 3 ? A.cstep : (size_t)A.w;

unsigned short* pp = AT;

Expand Down Expand Up @@ -192,7 +192,7 @@ static void pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, int i, int max_ii, i

static void transpose_pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, int i, int max_ii, int k, int max_kk)
{
const int A_hstep = A.dims == 3 ? (int)A.cstep : A.w;
const size_t A_hstep = A.dims == 3 ? A.cstep : (size_t)A.w;

unsigned short* pp = AT;

Expand Down Expand Up @@ -256,7 +256,7 @@ static void transpose_pack_A_tile_fp32_to_bf16(const Mat& A, Mat& AT, int i, int

static void pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, int j, int max_jj, int k, int max_kk)
{
const int B_hstep = B.dims == 3 ? (int)B.cstep : B.w;
const size_t B_hstep = B.dims == 3 ? B.cstep : (size_t)B.w;

unsigned short* pp = BT;

Expand Down Expand Up @@ -567,7 +567,7 @@ static void pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, int j, int max_jj, i

static void transpose_pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, int j, int max_jj, int k, int max_kk)
{
const int B_hstep = B.dims == 3 ? (int)B.cstep : B.w;
const size_t B_hstep = B.dims == 3 ? B.cstep : (size_t)B.w;

unsigned short* pp = BT;

Expand Down Expand Up @@ -646,7 +646,7 @@ static void transpose_pack_B_tile_fp32_to_bf16(const Mat& B, Mat& BT, int j, int
static void transpose_unpack_output_tile_fp32_to_bf16(const Mat& topT, Mat& top_blob, int i, int max_ii, int j, int max_jj)
{
const int out_elempack = top_blob.elempack;
const int out_hstep = top_blob.dims == 3 ? (int)top_blob.cstep : top_blob.w;
const size_t out_hstep = top_blob.dims == 3 ? top_blob.cstep : (size_t)top_blob.w;

const float* pp = topT;

Expand Down Expand Up @@ -785,7 +785,7 @@ static void transpose_unpack_output_tile_fp32_to_bf16(const Mat& topT, Mat& top_
static void gemm_transB_packed_tile_bf16s(const Mat& AT_tile, const Mat& BT_tile, const Mat& CT_tile, Mat& topT_tile, Mat& top_blob, int broadcast_type_C, float alpha, int i, int max_ii, int j, int max_jj, int k, int max_kk, bool k_end)
{
const int out_elempack = top_blob.elempack;
const int out_hstep = top_blob.dims == 3 ? (int)top_blob.cstep : top_blob.w;
const size_t out_hstep = top_blob.dims == 3 ? top_blob.cstep : (size_t)top_blob.w;

const unsigned short* pAT = AT_tile;
const unsigned short* pBT = BT_tile;
Expand Down
Loading
Loading