larq · lgeiger · Sep 8, 2021 · Mar 4, 2021 · Mar 4, 2021 · Apr 12, 2021
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -95,6 +95,8 @@ jobs:
         run: bazelisk test larq_compute_engine/mlir/tests:all --test_output=all
       - name: Run End2End tests
         run: bazelisk test larq_compute_engine/tests:end2end_test --test_output=all
+      - name: Run Strip dequantize op tests
+        run: bazelisk test larq_compute_engine/tests:strip_lcedequantize_test --test_output=all
 
   ConverterPython:
     runs-on: ubuntu-latest

diff --git a/larq_compute_engine/mlir/python/util.py b/larq_compute_engine/mlir/python/util.py
@@ -225,3 +225,79 @@ def modify_integer_quantized_model_io_type(
 
     # Convert the model to a bytearray
     return _convert_model_from_object_to_bytearray(model)
+
+
+def strip_lcedequantize_ops(model):
+    """Strip the LceDequantize ops to directly output bitpacked tf.int32 tensors."""
+    # Convert the model to an object
+    model = _convert_model_from_bytearray_to_object(model)
+
+    if len(model.subgraphs) > 1:
+        raise ValueError(
+            "Model must only have one subgraph. Instead, it has "
+            "{} subgraphs.".format(len(model.subgraphs))
+        )
+
+    ## Find the LceDequantize operators
+    subgraph = model.subgraphs[0]
+    tensors = subgraph.tensors
+    operators = subgraph.operators
+    remove_tensors_idxs = set()
+
+    # Ensure model has at least one LceDequantize operator
+    lce_dequant_opcode_idx = None
+    for idx, opcode in enumerate(model.operatorCodes):
+        if opcode.customCode == b"LceDequantize":
+            lce_dequant_opcode_idx = idx
+        if lce_dequant_opcode_idx is not None:
+            break
+    if lce_dequant_opcode_idx is None:
+        raise ValueError("Model does not contain any LceDequantize operators.")
+
+    # Ensure model outputs are dequantized
+    lce_output_dequant_ops = []
+    for op in operators:
+        # Find output LceDequantize operator
+        if (
+            op.opcodeIndex == lce_dequant_opcode_idx
+            and op.outputs[0] in subgraph.outputs
+        ):
+            pos, float_tensor, int_tensor = (
+                "output",
+                tensors[op.outputs[0]],
+                tensors[op.inputs[0]],
+            )
+            lce_output_dequant_ops.append(op)
+        # Otherwise, ignore
+        else:
+            continue
+        # If found, validate the input/output tensor type
+        if float_tensor.type != tflite_schema.TensorType.FLOAT32:
+            raise ValueError(
+                "Model {} type must be tf.float32. Expected type for tensor with "
+                "name '{}' is tf.float32, instead type is tf.{}".format(
+                    pos,
+                    float_tensor.name,
+                    _convert_tflite_enum_type_to_tf_type(float_tensor.type).name,
+                )
+            )
+        if int_tensor.type != tflite_schema.TensorType.INT32:
+            raise ValueError(
+                "Expected type for tensor with "
+                "name '{}' is tf.int32, instead type is tf.{}".format(
+                    int_tensor.name,
+                    _convert_tflite_enum_type_to_tf_type(int_tensor.type).name,
+                )
+            )
+
+    # Remove the LceDequantize operators
+    for op in lce_output_dequant_ops:
+        subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0]
+        remove_tensors_idxs.add(op.outputs[0])
+        operators.remove(op)
+
+    # Remove tensors marked for deletion.
+    _remove_tensors_from_model(model, remove_tensors_idxs)
+
+    # Convert the model to a bytearray
+    return _convert_model_from_object_to_bytearray(model)
diff --git a/larq_compute_engine/tests/BUILD b/larq_compute_engine/tests/BUILD
@@ -20,6 +20,14 @@ py_test(
     ],
 )
 
+py_test(
+    name = "strip_lcedequantize_test",
+    srcs = ["strip_lcedequantize_test.py"],
+    deps = [
+        "//larq_compute_engine/mlir:converter",
+    ],
+)
+
 py_test(
     name = "convert_model",
     srcs = ["convert_model.py"],

diff --git a/larq_compute_engine/tests/strip_lcedequantize_test.py b/larq_compute_engine/tests/strip_lcedequantize_test.py
@@ -0,0 +1,76 @@
+import sys
+
+import larq as lq
+import pytest
+import tensorflow as tf
+
+from larq_compute_engine.mlir.python.converter import convert_keras_model
+from larq_compute_engine.mlir.python.util import strip_lcedequantize_ops
+
+
+def toy_model_sign(**kwargs):
+    img = tf.keras.layers.Input(shape=(224, 224, 3))
+    x = lq.layers.QuantConv2D(
+        256,
+        kernel_size=3,
+        strides=1,
+        padding="same",
+        pad_values=1,
+        input_quantizer="ste_sign",
+        kernel_quantizer="ste_sign",
+        kernel_constraint="weight_clip",
+    )(img)
+    x = lq.quantizers.SteSign()(x)
+    return tf.keras.Model(inputs=img, outputs=x)
+
+
+def quant(x):
+    return tf.quantization.fake_quant_with_min_max_vars(x, -3.0, 3.0)
+
+
+def toy_model_int8_sign(**kwargs):
+    img = tf.keras.layers.Input(shape=(224, 224, 3))
+    x = quant(img)
+    x = lq.layers.QuantConv2D(
+        256,
+        kernel_size=3,
+        strides=1,
+        padding="same",
+        pad_values=1,
+        input_quantizer="ste_sign",
+        kernel_quantizer="ste_sign",
+        kernel_constraint="weight_clip",
+    )(img)
+    x = lq.quantizers.SteSign()(x)
+    x = quant(x)
+    return tf.keras.Model(inputs=img, outputs=x)
+
+
+@pytest.mark.parametrize("model_cls", [toy_model_sign, toy_model_int8_sign])
+@pytest.mark.parametrize("inference_input_type", [tf.int8, tf.float32])
+@pytest.mark.parametrize("inference_output_type", [tf.int8, tf.float32])
+def test_strip_lcedequantize_ops(
+    model_cls, inference_input_type, inference_output_type
+):
+    model_lce = convert_keras_model(
+        model_cls(),
+        inference_input_type=inference_input_type,
+        inference_output_type=inference_output_type,
+        experimental_default_int8_range=None,
+        experimental_enable_bitpacked_activations=True,
+    )
+    model_lce = strip_lcedequantize_ops(model_lce)
+    interpreter = tf.lite.Interpreter(model_content=model_lce)
+    input_details = interpreter.get_input_details()
+    assert len(input_details) == 1
+    assert input_details[0]["dtype"] == inference_input_type.as_numpy_dtype
+    output_details = interpreter.get_output_details()
+    assert len(output_details) == 1
+    if inference_output_type == tf.float32:
+        assert output_details[0]["dtype"] == tf.int32.as_numpy_dtype
+    else:
+        assert output_details[0]["dtype"] == inference_output_type.as_numpy_dtype
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-s"]))