[tosa] : Extend quantization support in tosa backend.

sahas3 · sahas3 · commit e8e270f59765 · 2025-12-04T14:47:39.000-05:00
diff --git a/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h b/include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h
@@ -35,11 +35,6 @@ Value buildRescaleToInt32(PatternRewriter &rewriter, Operation *op,
                           Value input_val, double input_scale,
                           int64_t input_zp);
 
-// Creates a TOSA rescale op based on conv2d parameters.
-Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
-                               Value conv_val, ShapedType input_type,
-                               ShapedType weight_type, ShapedType output_type);
-
 // Check if scale32 mode is used for given output_element_type
 bool isScale32(mlir::quant::UniformQuantizedType output_element_type);
 
@@ -114,6 +109,13 @@ Value emitExplicitZeroPadNHWC(Location loc, PatternRewriter &rewriter,
                               Operation *op, Value inputNHWC,
                               ArrayRef<int64_t> padExtents);
 
+// Get the zero point from a torch.tensor or torch.qtensor value.
+// If the value is a quantized tensor, it extracts the zero point as a
+// scalar integer value. If the value is a float tensor, it returns a
+// constant 0.
+FailureOr<Value> getZeroPointValue(PatternRewriter &rewriter, Operation *op,
+                                   Value tensor, Type elemType);
+
 } // namespace tosa
 } // namespace mlir
 
diff --git a/include/torch-mlir/Conversion/Utils/Utils.h b/include/torch-mlir/Conversion/Utils/Utils.h
@@ -119,6 +119,9 @@ FailureOr<Value> squeezeTensor(PatternRewriter &rewriter, Operation *op,
 
 void getZeroPoint(Value value, Value &zeropoint);
 
+LogicalResult getQuantizationParams(Value value, Value &zeropoint, Value &scale,
+                                    int64_t &axis);
+
 } // namespace Torch
 } // namespace torch
 } // namespace mlir
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeUtils.cpp
@@ -11,6 +11,8 @@
 #include "mlir/Dialect/Tosa/IR/TosaOps.h" // from @llvm-project
 #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" // from @llvm-project
+#include "torch-mlir/Conversion/Utils/Utils.h"
+#include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 #include "llvm/ADT/ArrayRef.h"
 
 namespace mlir {
@@ -91,120 +93,6 @@ Value buildRescaleToInt32(PatternRewriter &rewriter, Operation *op,
                       input_zp, 0, tosa::RoundingMode::SINGLE_ROUND, true);
 }
 
-// Creates a TOSA rescale op based on conv2d parameters.
-Value buildRescaleOpConvOutput(PatternRewriter &rewriter, Operation *op,
-                               Value conv_val, ShapedType input_type,
-                               ShapedType weight_type, ShapedType output_type) {
-  auto input_qtype =
-      dyn_cast<mlir::quant::UniformQuantizedType>(input_type.getElementType());
-  auto output_qtype =
-      dyn_cast<mlir::quant::UniformQuantizedType>(output_type.getElementType());
-
-  double input_scale = input_qtype.getScale();
-
-  int64_t output_zp = output_qtype.getZeroPoint();
-  double output_scale = output_qtype.getScale();
-
-  bool scale32 = isScale32(output_qtype);
-  int32_t scale_width = scale32 ? 32 : 16;
-
-  bool input_unsigned = input_qtype.isUnsignedInteger();
-  bool output_unsigned = output_qtype.isUnsignedInteger();
-
-  const auto input_zp_val = tosa::createZeroPointTensor(
-      rewriter, op->getLoc(), input_type, static_cast<int64_t>(0));
-  if (!input_zp_val.has_value())
-    op->emitError("Failed to create input zero-point tensor for RescaleOp.");
-
-  const auto output_zp_val = tosa::createZeroPointTensor(
-      rewriter, op->getLoc(), output_type, output_zp);
-  if (!output_zp_val.has_value())
-    op->emitError("Failed to create output zero-point tensor for RescaleOp.");
-
-  if (auto weight_per_tensor_qtype =
-          dyn_cast<mlir::quant::UniformQuantizedType>(
-              weight_type.getElementType())) {
-    // Per-tensor quantization
-    double weight_scale = weight_per_tensor_qtype.getScale();
-
-    int32_t multiplier;
-    int32_t shift;
-
-    double op_tensor_scale = (input_scale * weight_scale) / output_scale;
-
-    if (!computeMultiplierAndShift(op_tensor_scale, multiplier, shift,
-                                   scale_width))
-      op->emitError(
-          "buildRescaleOpConvOutput: shift must be in the range 2 <= shift <= "
-          "62");
-
-    Value multiplier_val =
-        buildRescaleMultiplier(scale32, rewriter, op, {multiplier});
-    auto shift_val = tosa::getConstTensor<int8_t>(
-                         rewriter, op, {static_cast<int8_t>(shift)}, {1})
-                         .value();
-
-    auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>(
-        rewriter, op->getLoc(), output_type, conv_val, multiplier_val,
-        shift_val, input_zp_val.value(), output_zp_val.value(),
-        rewriter.getBoolAttr(scale32),
-        tosa::RoundingModeAttr::get(rewriter.getContext(),
-                                    tosa::RoundingMode::DOUBLE_ROUND),
-        rewriter.getBoolAttr(false), rewriter.getBoolAttr(input_unsigned),
-        rewriter.getBoolAttr(output_unsigned));
-
-    return rescale_op.getResult();
-
-  } else if (auto weight_per_channel_qtype =
-                 dyn_cast<mlir::quant::UniformQuantizedPerAxisType>(
-                     weight_type.getElementType())) {
-    // Per-channel quantization
-    SmallVector<int32_t> multiplier_arr;
-    SmallVector<int8_t> shift_arr;
-
-    SmallVector<double> weight_scale_arr(
-        weight_per_channel_qtype.getScales().begin(),
-        weight_per_channel_qtype.getScales().end());
-
-    for (double weight_scale : weight_scale_arr) {
-      int32_t multiplier;
-      int32_t shift;
-
-      double op_channel_scale = (input_scale * weight_scale) / output_scale;
-
-      if (!computeMultiplierAndShift(op_channel_scale, multiplier, shift, 32))
-        op->emitError(
-            "buildRescaleOpConvOutput: shift must be in the range 2 <= shift "
-            "<= 62");
-
-      multiplier_arr.push_back(multiplier);
-      shift_arr.push_back(static_cast<int8_t>(shift));
-    }
-
-    Value multiplier_val =
-        buildRescaleMultiplier(scale32, rewriter, op, multiplier_arr);
-    auto shift_val =
-        tosa::getConstTensor<int8_t>(rewriter, op, shift_arr,
-                                     {static_cast<int64_t>(shift_arr.size())})
-            .value();
-
-    auto rescale_op = CreateOpAndInfer<tosa::RescaleOp>(
-        rewriter, op->getLoc(), output_type, conv_val, multiplier_val,
-        shift_val, input_zp_val.value(), output_zp_val.value(),
-        rewriter.getBoolAttr(scale32),
-        tosa::RoundingModeAttr::get(rewriter.getContext(),
-                                    tosa::RoundingMode::DOUBLE_ROUND),
-        rewriter.getBoolAttr(true), rewriter.getBoolAttr(input_unsigned),
-        rewriter.getBoolAttr(output_unsigned));
-
-    return rescale_op.getResult();
-
-  } else {
-    op->emitOpError("buildConvRescaleOp: unknown weight quantized type");
-    return nullptr;
-  }
-}
-
 // Check if scale32 mode is used for given output_element_type
 bool isScale32(mlir::quant::UniformQuantizedType output_element_type) {
   return (output_element_type.getStorageTypeIntegralWidth() == 8);
@@ -666,5 +554,31 @@ Value emitExplicitZeroPadNHWC(Location loc, PatternRewriter &rewriter,
       .getResult();
 }
 
+FailureOr<Value> getZeroPointValue(PatternRewriter &rewriter, Operation *op,
+                                   Value tensor, Type elemType) {
+  Location loc = op->getLoc();
+
+  Value zp;
+  // Torch::getZeroPoint looks at the defining op of `tensor` to find
+  // the quantization parameters.
+  torch::Torch::getZeroPoint(tensor, zp);
+
+  if (!zp) {
+    // Initialize zero constant values as zero-points, if the input tensor isn't
+    // quantized
+    zp = tosa::createZeroPointTensor(rewriter, loc, elemType, 0).value();
+  } else {
+
+    int64_t zpConst;
+    if (!matchPattern(zp, torch::Torch::m_TorchConstantInt(&zpConst)))
+      return rewriter.notifyMatchFailure(
+          op, "zero point must be a scalar constant");
+
+    zp = tosa::createZeroPointTensor(rewriter, loc, elemType, zpConst).value();
+  }
+
+  return zp;
+}
+
 } // namespace tosa
 } // namespace mlir
diff --git a/lib/Conversion/Utils/Utils.cpp b/lib/Conversion/Utils/Utils.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 #include "torch-mlir/Dialect/Torch/Utils/Utils.h"
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace mlir {
 namespace torch {
@@ -566,9 +567,45 @@ FailureOr<Value> squeezeTensor(PatternRewriter &rewriter, Operation *op,
 }
 
 void getZeroPoint(Value value, Value &zeropoint) {
-  if (auto make = value.getDefiningOp<Aten_MakePerTensorQuantizedTensorOp>()) {
-    zeropoint = make.getZeroPoint();
-  }
+  Operation *definingOp = value.getDefiningOp();
+  if (!definingOp)
+    return;
+
+  // Extract and set the zero point from a given op.
+  auto getZp = [&](auto op) { zeropoint = op.getZeroPoint(); };
+
+  llvm::TypeSwitch<Operation *>(definingOp)
+      .Case<Aten_MakePerTensorQuantizedTensorOp>(getZp)
+      .Case<AtenQuantizePerTensorOp>(getZp)
+      .Case<Aten_MakePerChannelQuantizedTensorOp>(getZp);
+}
+
+LogicalResult getQuantizationParams(Value value, Value &zeropoint, Value &scale,
+                                    int64_t &axis) {
+  Operation *definingOp = value.getDefiningOp();
+  if (!definingOp)
+    return failure();
+
+  // Extract and set the common parameters from a given op.
+  auto setParams = [&](auto op) -> LogicalResult {
+    zeropoint = op.getZeroPoint();
+    scale = op.getScale();
+    // Axis must be constant scalar int for Aten_MakePerChannelQuantizedTensorOp
+    if constexpr (std::is_same_v<decltype(op),
+                                 Aten_MakePerChannelQuantizedTensorOp>) {
+      return success(matchPattern(op.getAxis(), m_TorchConstantInt(&axis)));
+    } else {
+      // Other ops don't have axis parameter
+      axis = -1;
+      return success();
+    }
+  };
+
+  return llvm::TypeSwitch<Operation *, LogicalResult>(definingOp)
+      .Case<Aten_MakePerTensorQuantizedTensorOp>(setParams)
+      .Case<AtenQuantizePerTensorOp>(setParams)
+      .Case<Aten_MakePerChannelQuantizedTensorOp>(setParams)
+      .Default([](auto) { return failure(); });
 }
 
 } // namespace Torch
diff --git a/lib/Dialect/TorchConversion/Transforms/Passes.cpp b/lib/Dialect/TorchConversion/Transforms/Passes.cpp
@@ -115,6 +115,10 @@ void TorchConversion::createTorchBackendToLinalgOnTensorsBackendPipeline(
 void TorchConversion::createTorchBackendToTosaBackendPipeline(
     OpPassManager &pm,
     const TorchConversion::TosaBackendPipelineOptions &options) {
+
+  // We want to fuse quantized operations together before lowering to tosa.
+  pm.addNestedPass<func::FuncOp>(Torch::createFuseQuantizedOpsPass());
+
   pm.addNestedPass<func::FuncOp>(
       createConvertTorchToTosaPass(options.requireFullTosaConversion));
   // Fold full-layer operations on TOSA constants
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -3566,9 +3566,6 @@
     "ViewDtypeStaticModule_basic",
     "Unfold_Module_Rank_Zero_Size_Zero_basic",
     "ArangeZeroElementOutputModule_basic",
-    "SliceOutOfUpperBoundIndexModule_basic",
-    "SliceOutOfUpperBoundIndexStaticModule_basic",
-    "SliceStartEqEndModule_basic",
     "ElementwiseCreateComplexModule_basic",
     "AtenPolarDoubleModule_basic",
     "AtenPolarFloatModule_basic",
@@ -3681,8 +3678,6 @@
     "Conv1dWithValidPaddingModule_basic",
     "Conv1dGroupModule_basic",
     "Conv2dQInt8Module_grouped",
-    "Conv2dQInt8PerChannelModule_basic",
-    "Conv2dQInt8PerChannelModule_depthwise",
     "Conv2dQInt8PerChannelModule_grouped",
     "Conv2dWithPaddingDilationStrideStaticModule_grouped",
     "Conv2dWithPaddingDilationStrideStaticModule_grouped_multiplier",
@@ -3903,7 +3898,6 @@
     "SignAndLogarithmOfDeterminantDynamicModule_F32",
     "SliceStaticComplexInputModule_basic",
     "SliceCopyStartGreaterThanDimSize_Module_basic",
-    "SliceEndSleStartModule_basic",
     "SliceOutOfLowerBoundEndIndexModule_basic",
     "SortIntListReverse_basic",
     "SortIntList_basic",
diff --git a/test/Conversion/TorchToTosa/quantization.mlir b/test/Conversion/TorchToTosa/quantization.mlir
@@ -5,8 +5,8 @@
 // CHECK-LABEL:   func.func @AtenMmQint8(
 // CHECK-SAME:      %[[LHS:.*]]: !torch.vtensor<[3,4],si8>,
 // CHECK-SAME:      %[[RHS:.*]]: !torch.vtensor<[4,3],si8>) -> !torch.vtensor<[3,3],f32> {
-// CHECK:           %[[SHIFT:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
-// CHECK:           %[[OUT_SCALE:.*]] = "tosa.const"() <{values = dense<3.784000e-04> : tensor<3x3xf32>}> : () -> tensor<3x3xf32>
+// CHECK-DAG:           %[[SHIFT:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK-DAG:           %[[OUT_SCALE:.*]] = "tosa.const"() <{values = dense<3.784000e-04> : tensor<1x1xf32>}> : () -> tensor<1x1xf32>
 // CHECK-DAG:           %[[MUL_OUT_SHAPE:.*]] = tosa.const_shape  {values = dense<3> : tensor<2xindex>} : () -> !tosa.shape<2>
 // CHECK-DAG:           %[[RHS_SHAPE:.*]] = tosa.const_shape  {values = dense<[1, 4, 3]> : tensor<3xindex>} : () -> !tosa.shape<3>
 // CHECK-DAG:           %[[LHS_SHAPE:.*]] = tosa.const_shape  {values = dense<[1, 3, 4]> : tensor<3xindex>} : () -> !tosa.shape<3>
@@ -19,7 +19,7 @@
 // CHECK:           %[[MATMUL:.*]] = tosa.matmul %[[LHS_RESHAPED]], %[[RHS_RESHAPED]], %[[LHS_ZP]], %[[RHS_ZP]] : (tensor<1x3x4xi8>, tensor<1x4x3xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x3x3xi32>
 // CHECK:           %[[MATMUL_RESHAPE:.*]] = tosa.reshape %[[MATMUL]], %[[MUL_OUT_SHAPE]] : (tensor<1x3x3xi32>, !tosa.shape<2>) -> tensor<3x3xi32>
 // CHECK:           %[[MATMUL_FP32:.*]] = tosa.cast %[[MATMUL_RESHAPE]] : (tensor<3x3xi32>) -> tensor<3x3xf32>
-// CHECK:           %[[OUT_SCALED:.*]] = tosa.mul %[[MATMUL_FP32]], %[[OUT_SCALE]], %[[SHIFT]] : (tensor<3x3xf32>, tensor<3x3xf32>, tensor<1xi8>) -> tensor<3x3xf32>
+// CHECK:           %[[OUT_SCALED:.*]] = tosa.mul %[[MATMUL_FP32]], %[[OUT_SCALE]], %[[SHIFT]] : (tensor<3x3xf32>, tensor<1x1xf32>, tensor<1xi8>) -> tensor<3x3xf32>
 // CHECK:           %[[RES:.*]] = torch_c.from_builtin_tensor %[[OUT_SCALED]] : tensor<3x3xf32> -> !torch.vtensor<[3,3],f32>
 // CHECK:           return %[[RES]]
 func.func @AtenMmQint8(%arg0: !torch.vtensor<[3,4],si8>, %arg1: !torch.vtensor<[4,3],si8>) -> !torch.vtensor<[3,3],f32>
@@ -76,3 +76,65 @@ func.func @quantization_per_tensor(%arg0: !torch.vtensor<[2,4,4],f32>) -> !torch
   %0 = torch.aten.quantize_per_tensor %arg0, %scale, %zp, %dtype : !torch.vtensor<[2,4,4],f32>, !torch.float, !torch.int, !torch.int -> !torch.vtensor<[2,4,4],!torch.qint8>
   return %0 : !torch.vtensor<[2,4,4],!torch.qint8>
 }
+
+
+// -----
+// CHECK-LABEL:   func.func @dequantize.self(
+// CHECK-SAME:      %[[IN:.*]]: !torch.vtensor<[3,4,3,2],si8>,
+// CHECK-SAME:      %[[SCALE:.*]]: !torch.vtensor<[3],f32>,
+// CHECK-SAME:      %[[ZP:.*]]: !torch.vtensor<[3],si8>) -> !torch.vtensor<[3,4,3,2],f32> {
+// CHECK:           %[[MUL_SHIFT:.*]] = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[QUANT_PARAM_SHAPE:.*]] = tosa.const_shape  {values = dense<[3, 1, 1, 1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+// CHECK:           %[[IN_TENSOR:.*]] = torch_c.to_builtin_tensor %[[IN]] : !torch.vtensor<[3,4,3,2],si8> -> tensor<3x4x3x2xi8>
+// CHECK:           %[[IN_I32:.*]] = tosa.cast %[[IN_TENSOR]] : (tensor<3x4x3x2xi8>) -> tensor<3x4x3x2xi32>
+// CHECK:           %[[ZP_TENSOR:.*]] = torch_c.to_builtin_tensor %[[ZP]] : !torch.vtensor<[3],si8> -> tensor<3xi8>
+// CHECK:           %[[ZP_I32:.*]] = tosa.cast %[[ZP_TENSOR]] : (tensor<3xi8>) -> tensor<3xi32>
+// CHECK:           %[[ZP_RESHAPED:.*]] = tosa.reshape %[[ZP_I32]], %[[QUANT_PARAM_SHAPE]] : (tensor<3xi32>, !tosa.shape<4>) -> tensor<3x1x1x1xi32>
+// CHECK:           %[[SUB:.*]] = tosa.sub %[[IN_I32]], %[[ZP_RESHAPED]] : (tensor<3x4x3x2xi32>, tensor<3x1x1x1xi32>) -> tensor<3x4x3x2xi32>
+// CHECK:           %[[SUB_CAST:.*]] = tosa.cast %[[SUB]] : (tensor<3x4x3x2xi32>) -> tensor<3x4x3x2xf32>
+// CHECK:           %[[SCALE_TENSOR:.*]] = torch_c.to_builtin_tensor %[[SCALE]] : !torch.vtensor<[3],f32> -> tensor<3xf32>
+// CHECK:           %[[SCALE_RESHAPED:.*]] = tosa.reshape %[[SCALE_TENSOR]], %[[QUANT_PARAM_SHAPE]] : (tensor<3xf32>, !tosa.shape<4>) -> tensor<3x1x1x1xf32>
+// CHECK:           %[[MUL:.*]] = tosa.mul %[[SUB_CAST]], %[[SCALE_RESHAPED]], %[[MUL_SHIFT]] : (tensor<3x4x3x2xf32>, tensor<3x1x1x1xf32>, tensor<1xi8>) -> tensor<3x4x3x2xf32>
+// CHECK:           %[[RES:.*]] = torch_c.from_builtin_tensor %[[MUL]]
+func.func @dequantize.self(%arg0: !torch.vtensor<[3,4,3,2],si8>, %arg1: !torch.vtensor<[3],f32>, %arg2: !torch.vtensor<[3],si8>) -> !torch.vtensor<[3,4,3,2],f32> {
+    %int0 = torch.constant.int 0
+    %0 = torch.aten._make_per_channel_quantized_tensor %arg0, %arg1, %arg2, %int0 : !torch.vtensor<[3,4,3,2],si8>, !torch.vtensor<[3],f32>, !torch.vtensor<[3],si8>, !torch.int -> !torch.vtensor<[3,4,3,2],!torch.qint8>
+    %1 = torch.aten.dequantize.self %0 : !torch.vtensor<[3,4,3,2],!torch.qint8> -> !torch.vtensor<[3,4,3,2],f32>
+    return %1 : !torch.vtensor<[3,4,3,2],f32>
+}
+
+
+// -----
+// CHECK-LABEL:   func.func @quantized_conv(
+// CHECK:           %[[WTS_ZP:.*]] = "tosa.const"() <{values = dense<3> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[IN_ZP:.*]] = "tosa.const"() <{values = dense<7> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK:           %[[CONV:.*]] = tosa.conv2d
+// CHECK-SAME:      %[[IN_ZP]], %[[WTS_ZP]] {acc_type = i32, dilation = array<i64: 1, 1>, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>} : (tensor<?x7x8x4xi8>, tensor<3x3x2x4xi8>, tensor<?xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<?x5x7x3xi32>
+// CHECK-NOT: torch.aten.quantize_per_tensor
+// CHECK-NOT: torch.aten.dequantize.self
+// CHECK-NOT: torch.aten._make_per_tensor_quantized_tensor
+// CHECK-NOT: torch.aten.dequantize.tensor
+
+func.func @quantized_conv(%arg0: !torch.vtensor<[?,4,7,8],si8>, %arg1: !torch.vtensor<[3,4,3,2],si8>, %arg2: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?,3,5,7],f32> {
+  %false = torch.constant.bool false
+  %int1 = torch.constant.int 1
+  %int0 = torch.constant.int 0
+  %float1.000000e-04 = torch.constant.float 1.000000e-04
+  %int3 = torch.constant.int 3
+  %int7 = torch.constant.int 7
+  %float1.000000e-02 = torch.constant.float 1.000000e-02
+  %int14 = torch.constant.int 14
+  %0 = torch.aten.quantize_per_tensor %arg2, %float1.000000e-04, %int0, %int14 : !torch.vtensor<[?],f32>, !torch.float, !torch.int, !torch.int -> !torch.vtensor<[?],!torch.qint32>
+  %1 = torch.aten.dequantize.self %0 : !torch.vtensor<[?],!torch.qint32> -> !torch.vtensor<[?],f32>
+  %2 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
+  %3 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
+  %4 = torch.prim.ListConstruct  : () -> !torch.list<int>
+  %5 = torch.aten._make_per_tensor_quantized_tensor %arg0, %float1.000000e-02, %int7 : !torch.vtensor<[?,4,7,8],si8>, !torch.float, !torch.int -> !torch.vtensor<[?,4,7,8],!torch.qint8>
+  %6 = torch.aten._make_per_tensor_quantized_tensor %arg1, %float1.000000e-02, %int3 : !torch.vtensor<[3,4,3,2],si8>, !torch.float, !torch.int -> !torch.vtensor<[3,4,3,2],!torch.qint8>
+  %7 = torch.aten.quantize_per_tensor %1, %float1.000000e-04, %int0, %int14 : !torch.vtensor<[?],f32>, !torch.float, !torch.int, !torch.int -> !torch.vtensor<[?],!torch.qint32>
+  %8 = torch.aten.int_repr %7 : !torch.vtensor<[?],!torch.qint32> -> !torch.vtensor<[?],si32>
+  %9 = torch.aten.convolution %5, %6, %8, %2, %3, %2, %false, %4, %int1 : !torch.vtensor<[?,4,7,8],!torch.qint8>, !torch.vtensor<[3,4,3,2],!torch.qint8>, !torch.vtensor<[?],si32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[?,3,5,7],si32>
+  %10 = torch.aten._make_per_tensor_quantized_tensor %9, %float1.000000e-04, %int0 : !torch.vtensor<[?,3,5,7],si32>, !torch.float, !torch.int -> !torch.vtensor<[?,3,5,7],!torch.qint32>
+  %11 = torch.aten.dequantize.tensor %10 : !torch.vtensor<[?,3,5,7],!torch.qint32> -> !torch.vtensor<[?,3,5,7],f32>
+  return %11 : !torch.vtensor<[?,3,5,7],f32>
+}
diff --git a/test/Conversion/TorchToTosa/torch-backend-to-tosa-backend-pipeline.mlir b/test/Conversion/TorchToTosa/torch-backend-to-tosa-backend-pipeline.mlir