larq
diff --git a/‎larq_compute_engine/core/BUILD
Lines changed: 0 additions & 106 deletions b/‎larq_compute_engine/core/BUILD
Lines changed: 0 additions & 106 deletions
diff --git a/‎larq_compute_engine/core/bconv2d/BUILD
Lines changed: 47 additions & 0 deletions b/‎larq_compute_engine/core/bconv2d/BUILD
Lines changed: 47 additions & 0 deletions
diff --git a/‎larq_compute_engine/tflite/kernels/bconv2d_impl.h renamed to ‎larq_compute_engine/core/bconv2d/optimized.h
Lines changed: 25 additions & 27 deletions b/‎larq_compute_engine/tflite/kernels/bconv2d_impl.h renamed to ‎larq_compute_engine/core/bconv2d/optimized.h
Lines changed: 25 additions & 27 deletions
diff --git a/‎larq_compute_engine/core/bconv2d_output_transform.h renamed to ‎larq_compute_engine/core/bconv2d/output_transform.h
Lines changed: 5 additions & 6 deletions b/‎larq_compute_engine/core/bconv2d_output_transform.h renamed to ‎larq_compute_engine/core/bconv2d/output_transform.h
Lines changed: 5 additions & 6 deletions
diff --git a/‎larq_compute_engine/core/padding_functor.h renamed to ‎larq_compute_engine/core/bconv2d/padding_functor.h
Lines changed: 8 additions & 9 deletions b/‎larq_compute_engine/core/padding_functor.h renamed to ‎larq_compute_engine/core/bconv2d/padding_functor.h
Lines changed: 8 additions & 9 deletions
@@ -9,112 +9,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "bitpack",
-    hdrs = ["bitpack.h"] + select({
-        "//larq_compute_engine:aarch64_build": [
-            "bitpack_aarch64.h",
-        ],
-        "@org_tensorflow//tensorflow:android_arm64": [
-            "bitpack_aarch64.h",
-        ],
-        "//conditions:default": [],
-    }),
-    deps = [
-        ":types",
-        "@flatbuffers",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:types",
-        "@ruy//ruy/profiler:instrumentation",
-    ],
-)
-
-cc_library(
-    name = "bitpack_utils",
-    hdrs = ["bitpack_utils.h"],
-    deps = [
-        ":bitpack",
-    ],
-)
-
-cc_library(
-    name = "padding_functor",
-    hdrs = ["padding_functor.h"],
-)
-
-cc_library(
-    name = "bconv2d_output_transform",
-    hdrs = [
-        "bconv2d_output_transform.h",
-    ],
-    deps = [
-        ":types",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:common",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:cppmath",
-    ],
-)
-
-cc_library(
-    name = "bgemm_kernels_common",
-    hdrs = [
-        "bgemm_kernels_common.h",
-    ],
-    deps = [
-        ":bconv2d_output_transform",
-    ],
-)
-
-cc_library(
-    name = "bgemm_kernels_arm",
-    hdrs = [
-        "bgemm_kernels_arm.h",
-        "bgemm_kernels_arm32.h",
-        "bgemm_kernels_arm64.h",
-    ],
-    deps = [
-        ":bgemm_kernels_common",
-        "@ruy//ruy/profiler:instrumentation",
-    ],
-)
-
-cc_library(
-    name = "bgemm_kernels_ruy",
-    hdrs = [
-        "bgemm_kernels_arm.h",
-        "bgemm_kernels_ruy.h",
-    ],
-    deps = [
-        ":bgemm_kernels_arm",
-        ":bitpack",
-        "@ruy//ruy/profiler:instrumentation",
-    ],
-)
-
-cc_library(
-    name = "bgemm_impl",
-    hdrs = [
-        "bgemm_impl.h",
-        "bgemm_trmul_params.h",
-        "ruy_pack.h",
-    ],
-    deps = [
-        ":bgemm_kernels_ruy",
-        "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_context",
-        "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_gemm",
-        "@ruy//ruy/profiler:instrumentation",
-    ],
-)
-
-cc_library(
-    name = "bconv2d_impl_ref",
-    hdrs = [
-        "bconv2d_impl_ref.h",
-    ],
-    deps = [
-        ":bconv2d_output_transform",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:types",
-    ],
-)
-
 cc_library(
     name = "bmaxpool",
     hdrs = [
 
@@ -0,0 +1,47 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "output_transform",
+    hdrs = [
+        "output_transform.h",
+    ],
+    deps = [
+        "//larq_compute_engine/core:types",
+        "@org_tensorflow//tensorflow/lite/kernels/internal:common",
+        "@org_tensorflow//tensorflow/lite/kernels/internal:cppmath",
+    ],
+)
+
+cc_library(
+    name = "padding_functor",
+    hdrs = ["padding_functor.h"],
+)
+
+cc_library(
+    name = "reference",
+    hdrs = [
+        "reference.h",
+    ],
+    deps = [
+        ":output_transform",
+        "@org_tensorflow//tensorflow/lite/kernels/internal:types",
+    ],
+)
+
+cc_library(
+    name = "optimized",
+    hdrs = [
+        "optimized.h",
+    ],
+    deps = [
+        ":padding_functor",
+        "//larq_compute_engine/core/bgemm",
+        "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_context",
+        "@org_tensorflow//tensorflow/lite/kernels:cpu_backend_gemm",
+        "@org_tensorflow//tensorflow/lite/kernels:padding",
+        "@org_tensorflow//tensorflow/lite/kernels/internal:optimized_base",
+        "@ruy//ruy/profiler:instrumentation",
+    ],
+)
@@ -1,24 +1,20 @@
-#ifndef COMPUTE_ENGINE_TFLITE_KERNELS_BCONV2D_IMPL_H_
-#define COMPUTE_ENGINE_TFLITE_KERNELS_BCONV2D_IMPL_H_
+#ifndef COMPUTE_ENGINE_CORE_BCONV2D_OPTIMIZED_H_
+#define COMPUTE_ENGINE_CORE_BCONV2D_OPTIMIZED_H_
 
-#include "larq_compute_engine/core/bgemm_impl.h"
-#include "larq_compute_engine/core/padding_functor.h"
+#include "larq_compute_engine/core/bconv2d/padding_functor.h"
+#include "larq_compute_engine/core/bgemm/bgemm.h"
 #include "ruy/profiler/instrumentation.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
 #include "tensorflow/lite/kernels/internal/optimized/im2col_utils.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/padding.h"
 
-using namespace tflite;
-
 namespace compute_engine {
+namespace core {
+namespace bconv2d {
 
-namespace ce = compute_engine;
-
-namespace tflite {
-
-using ce::core::TBitpacked;
+using namespace tflite;
 
 inline void im2col(const ConvParams& params, const RuntimeShape& input_shape,
                    const TBitpacked* input_data,
@@ -77,15 +73,15 @@ const float* GetPostActivationMultiplier(
 }
 
 template <typename AccumScalar, typename DstScalar>
-inline void BConv2D(const ConvParams& params, const RuntimeShape& input_shape,
-                    const TBitpacked* input_data,
-                    const RuntimeShape& filter_shape,
-                    const TBitpacked* packed_filter_data,
-                    const OutputTransform<DstScalar>& output_transform,
-                    const RuntimeShape& output_shape, DstScalar* output_data,
-                    const RuntimeShape& im2col_shape, TBitpacked* im2col_data,
-                    const float* padding_buffer, const int pad_value,
-                    CpuBackendContext* cpu_backend_context) {
+inline void BConv2DOptimized(
+    const ConvParams& params, const RuntimeShape& input_shape,
+    const TBitpacked* input_data, const RuntimeShape& filter_shape,
+    const TBitpacked* packed_filter_data,
+    const OutputTransform<DstScalar>& output_transform,
+    const RuntimeShape& output_shape, DstScalar* output_data,
+    const RuntimeShape& im2col_shape, TBitpacked* im2col_data,
+    const float* padding_buffer, const int pad_value,
+    CpuBackendContext* cpu_backend_context) {
   TF_LITE_ASSERT_EQ(input_shape.DimensionsCount(), 4);
   TF_LITE_ASSERT_EQ(filter_shape.DimensionsCount(), 4);
   TF_LITE_ASSERT_EQ(output_shape.DimensionsCount(), 4);
@@ -132,7 +128,7 @@ inline void BConv2D(const ConvParams& params, const RuntimeShape& input_shape,
     std::fill(
         output_data,
         output_data + FlatSizeSkipDim(output_shape, 3) *
-                          ce::core::GetBitpackedSize(output_shape.Dims(3)),
+                          bitpacking::GetBitpackedSize(output_shape.Dims(3)),
         TBitpacked(0));
   }
 
@@ -162,8 +158,9 @@ inline void BConv2D(const ConvParams& params, const RuntimeShape& input_shape,
   dst_params.rows = n;
   dst_params.cols = m;
 
-  BGemm<AccumScalar>(lhs_params, lhs_data, rhs_params, rhs_data, dst_params,
-                     output_data, output_transform, cpu_backend_context);
+  bgemm::BGemm<AccumScalar>(lhs_params, lhs_data, rhs_params, rhs_data,
+                            dst_params, output_data, output_transform,
+                            cpu_backend_context);
 
   if (params.padding_type == PaddingType::kSame && pad_value == 0) {
     const int stride_width = params.stride_width;
@@ -180,9 +177,9 @@ inline void BConv2D(const ConvParams& params, const RuntimeShape& input_shape,
     const int output_width = output_shape.Dims(2);
     const int output_height = output_shape.Dims(1);
 
-    ce::core::PaddingFunctor padding_functor;
+    PaddingFunctor padding_functor;
     {
-      ruy::profiler::ScopeLabel label3("ZeroPaddingCorrection");
+      ruy::profiler::ScopeLabel label("ZeroPaddingCorrection");
       padding_functor(
           batches, input_height, input_width, input_depth, nullptr,
           filter_height, filter_width, output_depth, stride_height,
@@ -193,7 +190,8 @@ inline void BConv2D(const ConvParams& params, const RuntimeShape& input_shape,
   }
 }
 
-}  // namespace tflite
+}  // namespace bconv2d
+}  // namespace core
 }  // namespace compute_engine
 
-#endif  // COMPUTE_ENGINE_TFLITE_KERNELS_BCONV2D_IMPL_H_
+#endif  // COMPUTE_ENGINE_CORE_BCONV2D_OPTIMIZED_H_
@@ -1,5 +1,5 @@
-#ifndef COMPUTE_ENGINE_CORE_OUTPUT_TRANSFORM_H_
-#define COMPUTE_ENGINE_CORE_OUTPUT_TRANSFORM_H_
+#ifndef COMPUTE_ENGINE_CORE_BCONV2D_OUTPUT_TRANSFORM_H_
+#define COMPUTE_ENGINE_CORE_BCONV2D_OUTPUT_TRANSFORM_H_
 
 #include <algorithm>
 #include <cstdint>
@@ -10,10 +10,8 @@
 #include "tensorflow/lite/kernels/internal/cppmath.h"
 
 namespace compute_engine {
-
 namespace core {
-
-using compute_engine::core::TBitpacked;
+namespace bconv2d {
 
 // Clamp an int32 value to int8 range
 inline std::int8_t saturate(std::int32_t x) {
@@ -169,7 +167,8 @@ struct OutputTransform<TBitpacked, OutputTransformDetails::Default> {
   }
 };
 
+}  // namespace bconv2d
 }  // namespace core
 }  // namespace compute_engine
 
-#endif  // COMPUTE_ENGINE_CORE_OUTPUT_TRANSFORM_H_
+#endif  // COMPUTE_ENGINE_CORE_BCONV2D_OUTPUT_TRANSFORM_H_
@@ -1,19 +1,16 @@
-#ifndef COMPUTE_ENGINE_KERNELS_PADDING_H_
-#define COMPUTE_ENGINE_KERNELS_PADDING_H_
+#ifndef COMPUTE_ENGINE_CORE_BCONV2D_PADDING_FUNCTOR_H_
+#define COMPUTE_ENGINE_CORE_BCONV2D_PADDING_FUNCTOR_H_
 
-#include "larq_compute_engine/core/bitpack.h"
+#include "larq_compute_engine/core/bitpacking/bitpack.h"
 #include "larq_compute_engine/core/types.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 
 namespace compute_engine {
 namespace core {
+namespace bconv2d {
 
-namespace ce = compute_engine;
-
-//
 // Applies (in-place) corrections for zero-padding
 // Assumes that padding type is 'SAME'.
-//
 class PaddingFunctor {
  public:
   static std::size_t get_cache_size(const int filter_height,
@@ -109,7 +106,8 @@ class PaddingFunctor {
             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
               // Sum over input channels
               int popcount = 0;
-              int packed_channels = GetBitpackedSize(input_channels);
+              int packed_channels =
+                  bitpacking::GetBitpackedSize(input_channels);
               for (int in_c = 0; in_c < packed_channels; ++in_c) {
                 int filter_idx;
                 // filter_data has shape
@@ -291,7 +289,8 @@ class PaddingFunctor {
   }
 };
 
+}  // namespace bconv2d
 }  // namespace core
 }  // namespace compute_engine
 
-#endif  // COMPUTE_ENGINE_KERNELS_PADDING_H_
+#endif  // COMPUTE_ENGINE_CORE_BCONV2D_PADDING_FUNCTOR_H_