Gemv fp16 fix (#9331)

lxwlaq · web-flow · commit 7a610b39a098 · 2022-09-09T10:01:46.000+08:00
* fixed fp16 gemv compute error while m = 1
diff --git a/lite/backends/arm/math/fp16/conv_impl_fp16.cc b/lite/backends/arm/math/fp16/conv_impl_fp16.cc
@@ -339,8 +339,8 @@ void conv1x1s1_gemm_fp16(CONV_PARAM(float16_t)) {
           }
         }
 
-        gemv_fp16(weights_group,
-                  din_group,
+        gemv_fp16(din_group,
+                  weights_group,
                   dout_group,
                   true,
                   n,
@@ -454,8 +454,8 @@ void conv_im2col_gemm_fp16(CONV_PARAM(float16_t)) {
           }
         }
 
-        gemv_fp16(weights_group,
-                  dB,
+        gemv_fp16(dB,
+                  weights_group,
                   dout_group,
                   true,
                   n,
diff --git a/lite/backends/arm/math/fp16/gemv_fp16.cc b/lite/backends/arm/math/fp16/gemv_fp16.cc
@@ -277,7 +277,7 @@ void gemv_fp16_trans(const float16_t *A,
                      ARMContext *ctx) {
   int Nup = (N + 7) / 8 * 8;
   int Mup = (M + 7) / 8 * 8;
-  auto size = (Mup * 3 + Nup);
+  auto size = (Mup * 2 + Nup);
   ctx->ExtendWorkspace(size * sizeof(float16_t));
   auto ptr_zero = ctx->workspace_data<float16_t>();
   memset(ptr_zero, 0, Mup * sizeof(float16_t));
@@ -289,12 +289,8 @@ void gemv_fp16_trans(const float16_t *A,
     memset(bias_ptr, 0, Mup * sizeof(float16_t));
   }
   float16_t *ptr_w = bias_ptr + Mup;
-  lite::TargetWrapperHost::MemcpySync(ptr_w, A, N * sizeof(float16_t));
+  lite::TargetWrapperHost::MemcpySync(ptr_w, x, N * sizeof(float16_t));
   memset(ptr_w + N, 0, (Nup - N) * sizeof(float16_t));
-  float16_t *data_in = ptr_w + Nup;
-  lite::TargetWrapperHost::MemcpySync(
-      data_in, x + (N - 1) * M, M * sizeof(float16_t));
-  memset(data_in + M, 0, (Mup - M) * sizeof(float16_t));
   memset(y, 0, M * sizeof(float16_t));
   float16_t local_alpha = 0.f;
   float16_t offset = 0.f;
@@ -317,7 +313,7 @@ void gemv_fp16_trans(const float16_t *A,
     int y_index = j * 8;
     const float16_t *ptr_in = ptr_w + y_index;
     const float16_t *inptr_row[8];
-    inptr_row[0] = x + y_index * M;
+    inptr_row[0] = A + y_index * M;
     for (int i = 1; i < 8; i++) {
       inptr_row[i] = inptr_row[i - 1] + M;
     }
diff --git a/lite/tests/math/gemv_fp16_compute_test.cc b/lite/tests/math/gemv_fp16_compute_test.cc
@@ -136,7 +136,7 @@ bool test_sgemv_fp16(bool tra,
                dc_basic,
                static_cast<float16_t>(1.f),
                static_cast<float16_t>(0.f),
-               false,
+               tra,
                has_bias,
                flag_act,
                alpha);
@@ -211,7 +211,7 @@ TEST(TestLiteGemvFP16, gemv_fp16) {
     LOG(INFO) << "run basic sgemm test";
     for (auto& m : {3, 8, 32, 397}) {
       for (auto& n : {3, 13, 141, 512, 789}) {
-        for (auto& tra : {false}) {
+        for (auto& tra : {false, true}) {
           for (auto& has_bias : {false, true}) {
             for (auto& flag_act : {0, 1}) {
               for (auto& th : {1, 2, 4}) {

Original file line number	Diff line number	Diff line change
`@@ -339,8 +339,8 @@ void conv1x1s1_gemm_fp16(CONV_PARAM(float16_t)) {`
`339`	`339`	`}`
`340`	`340`	`}`
`341`	`341`
`342`		`- gemv_fp16(weights_group,`
`343`		`- din_group,`
	`342`	`+ gemv_fp16(din_group,`
	`343`	`+ weights_group,`
`344`	`344`	`dout_group,`
`345`	`345`	`true,`
`346`	`346`	`n,`
`@@ -454,8 +454,8 @@ void conv_im2col_gemm_fp16(CONV_PARAM(float16_t)) {`
`454`	`454`	`}`
`455`	`455`	`}`
`456`	`456`
`457`		`- gemv_fp16(weights_group,`
`458`		`- dB,`
	`457`	`+ gemv_fp16(dB,`
	`458`	`+ weights_group,`
`459`	`459`	`dout_group,`
`460`	`460`	`true,`
`461`	`461`	`n,`