mlir/html/UniformSupport_8h_source.html

 //===- UniformSupport.h - Support utilities for uniform quant ---*- C++ -*-===//
 //
 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #ifndef MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_
 #define MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_

 #include "mlir/Dialect/QuantOps/QuantTypes.h"
 #include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/Types.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"

 namespace mlir {
 namespace quant {

 struct ExpressedToQuantizedConverter {
   static const ExpressedToQuantizedConverter forInputType(Type inputType);

   Type convert(QuantizedType elementalType) const;

   explicit operator bool() const { return (bool)expressedType; }

   const Type inputType;

   const Type expressedType;
 };

 class UniformQuantizedValueConverter {
 public:
   explicit UniformQuantizedValueConverter(UniformQuantizedType uniformType)
       : UniformQuantizedValueConverter(
             uniformType.getScale(),
             static_cast<double>(uniformType.getZeroPoint()),
             static_cast<double>(uniformType.getStorageTypeMin()),
             static_cast<double>(uniformType.getStorageTypeMax()),
             uniformType.getStorageTypeIntegralWidth(), uniformType.isSigned()) {
     assert(uniformType.getExpressedType().isa<FloatType>());
     assert(uniformType.getStorageType().isa<IntegerType>());
   }

   UniformQuantizedValueConverter(double scale, double zeroPoint,
                                  double clampMin, double clampMax,
                                  uint32_t storageBitWidth, bool isSigned)
       : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
         clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
         clampMinDouble(clampMin), clampMaxDouble(clampMax),
         storageBitWidth(storageBitWidth), isSigned(isSigned),
         roundMode(APFloat::rmNearestTiesToAway) {}

   UniformQuantizedValueConverter(double scale, double zeroPoint,
                                  APFloat clampMin, APFloat clampMax,
                                  uint32_t storageBitWidth, bool isSigned)
       : scale(scale), zeroPoint(zeroPoint), clampMin(clampMin),
         clampMax(clampMax), scaleDouble(scale), zeroPointDouble(zeroPoint),
         clampMinDouble(clampMin.convertToDouble()),
         clampMaxDouble(clampMax.convertToDouble()),
         storageBitWidth(storageBitWidth), isSigned(isSigned),
         roundMode(APFloat::rmNearestTiesToAway) {}

   virtual APInt quantizeFloatToInt(APFloat expressedValue) const {
     // This function is a performance critical code path in quantization
     // since it runs for each single float parameter value.

     // Specialize f32->u8/i8 case to optimize performance.
     if (&expressedValue.getSemantics() == &APFloat::IEEEsingle() &&
         storageBitWidth == 8 &&
         roundMode == llvm::APFloatBase::rmNearestTiesToAway) {
       return quantizeF32ToInt8(expressedValue);
     }

     bool lossy;
     expressedValue.convert(scale.getSemantics(), roundMode, &lossy);
     // fixedpoint = clamp(clampMin, clampMax, (
     //   roundHalfToEven(expressed / scale) + zeroPoint))
     APFloat scaled = (expressedValue / scale);
     scaled.roundToIntegral(roundMode);
     scaled.add(zeroPoint, roundMode);
     APFloat fixedpoint = llvm::minimum(scaled, clampMax);
     fixedpoint = llvm::maximum(fixedpoint, clampMin);

     llvm::APSInt result(storageBitWidth, !isSigned);
     fixedpoint.convertToInteger(result, roundMode, &lossy);

     return std::move(result);
   }

   int64_t quantizeFloatToInt64(APFloat expressedValue) const {
     APInt qValue = quantizeFloatToInt(expressedValue);
     return isSigned ? qValue.getSExtValue() : qValue.getZExtValue();
   }

   virtual ~UniformQuantizedValueConverter() {}

 private:
   // An optimized implementation to quantize f32 to i8/u8 with C++ native
   // arithmetic.
   virtual APInt quantizeF32ToInt8(APFloat expressedValue) const {
     assert(&expressedValue.getSemantics() == &APFloat::IEEEsingle());
     assert(storageBitWidth == 8);
     assert(roundMode == llvm::APFloatBase::rmNearestTiesToAway);

     const float realValue = expressedValue.convertToFloat();

     const double scaled = realValue / scaleDouble + zeroPointDouble;
     // Round to nearest integer with halfway cases rounded away from zero.
     const double scaledRounded = std::round(scaled);
     const double clamped =
         std::min(std::max(scaledRounded, clampMinDouble), clampMaxDouble);

     uint64_t signlessResult;
     if (isSigned) {
       int64_t clampedInt = static_cast<int8_t>(clamped);
       memcpy(&signlessResult, &clampedInt, sizeof(clampedInt));
     } else {
       signlessResult = static_cast<uint8_t>(clamped);
     }
     return APInt(storageBitWidth, signlessResult);
   }

   // Keep both APFloat and double versions of the quantization parameters
   // around since they will be used in generic and specialized arithmetic,
   // respectively.
   const APFloat scale;
   const APFloat zeroPoint;
   const APFloat clampMin;
   const APFloat clampMax;

   const double scaleDouble;
   const double zeroPointDouble;
   const double clampMinDouble;
   const double clampMaxDouble;

   const uint32_t storageBitWidth;
   const bool isSigned;
   const llvm::APFloat::roundingMode roundMode;
 };

 class UniformQuantizedPerAxisValueConverter {
 public:
   explicit UniformQuantizedPerAxisValueConverter(
       UniformQuantizedPerAxisType uniformType)
       : scales(uniformType.getScales()),
         zeroPoints(uniformType.getZeroPoints()),
         clampMin(static_cast<double>(uniformType.getStorageTypeMin())),
         clampMax(static_cast<double>(uniformType.getStorageTypeMax())),
         storageBitWidth(uniformType.getStorageTypeIntegralWidth()),
         isSigned(uniformType.isSigned()),
         quantizationDim(uniformType.getQuantizedDimension()) {
     assert(uniformType.getExpressedType().isa<FloatType>());
     assert(uniformType.getStorageType().isa<IntegerType>());
     assert(scales.size() == zeroPoints.size());
   }

   ElementsAttr convert(Attribute realValue);

 private:
   DenseElementsAttr convert(DenseFPElementsAttr attr);

   UniformQuantizedValueConverter getPerChunkConverter(int index) const {
     UniformQuantizedValueConverter converter(scales[index], zeroPoints[index],
                                              clampMin, clampMax,
                                              storageBitWidth, isSigned);
     return converter;
   }

   const ArrayRef<double> scales;
   const ArrayRef<int64_t> zeroPoints;
   const APFloat clampMin;
   const APFloat clampMax;
   const uint32_t storageBitWidth;
   const bool isSigned;
   int32_t quantizationDim;
 };

 } // namespace quant
 } // namespace mlir

 #endif // MLIR_DIALECT_QUANTOPS_UNIFORMSUPPORT_H_
mlir
Definition: InferTypeOpInterface.cpp:20

mlir::DenseFPElementsAttr
Definition: Attributes.h:976

StandardTypes.h

Types.h

mlir::quant::UniformQuantizedValueConverter::~UniformQuantizedValueConverter
virtual ~UniformQuantizedValueConverter()
Definition: UniformSupport.h:123

mlir::IntegerType
Integer types can have arbitrary bitwidth up to a large fixed limit.
Definition: StandardTypes.h:82

mlir::quant::UniformQuantizedType
Definition: QuantTypes.h:270

mlir::quant::UniformQuantizedValueConverter
Definition: UniformSupport.h:59

mlir::FloatType
Definition: StandardTypes.h:113

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(double scale, double zeroPoint, APFloat clampMin, APFloat clampMax, uint32_t storageBitWidth, bool isSigned)
Definition: UniformSupport.h:81

mlir::quant::UniformQuantizedValueConverter::quantizeFloatToInt64
int64_t quantizeFloatToInt64(APFloat expressedValue) const
Definition: UniformSupport.h:118

QuantTypes.h

llvm::ArrayRef
Definition: LLVM.h:37

mlir::quant::ExpressedToQuantizedConverter::expressedType
const Type expressedType
Definition: UniformSupport.h:49

mlir::DenseElementsAttr
Definition: Attributes.h:660

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(UniformQuantizedType uniformType)
Definition: UniformSupport.h:61

mlir::quant::ExpressedToQuantizedConverter
Definition: UniformSupport.h:32

mlir::Attribute
Definition: Attributes.h:53

mlir::quant::UniformQuantizedPerAxisType
Definition: QuantTypes.h:331

mlir::quant::UniformQuantizedPerAxisValueConverter::UniformQuantizedPerAxisValueConverter
UniformQuantizedPerAxisValueConverter(UniformQuantizedPerAxisType uniformType)
Definition: UniformSupport.h:175

mlir::Type
Definition: Types.h:84

mlir::quant::UniformQuantizedValueConverter::UniformQuantizedValueConverter
UniformQuantizedValueConverter(double scale, double zeroPoint, double clampMin, double clampMax, uint32_t storageBitWidth, bool isSigned)
Definition: UniformSupport.h:72

mlir::quant::ExpressedToQuantizedConverter::forInputType
static const ExpressedToQuantizedConverter forInputType(Type inputType)
Creates a converter for the given input type.
Definition: UniformSupport.cpp:21

mlir::quant::QuantizedType
Definition: QuantTypes.h:60

mlir::quant::ExpressedToQuantizedConverter::convert
Type convert(QuantizedType elementalType) const
Definition: UniformSupport.cpp:44

mlir::quant::UniformQuantizedPerAxisValueConverter
Definition: UniformSupport.h:173

mlir::quant::UniformQuantizedValueConverter::quantizeFloatToInt
virtual APInt quantizeFloatToInt(APFloat expressedValue) const
Definition: UniformSupport.h:91

mlir::quant::ExpressedToQuantizedConverter::inputType
const Type inputType
Definition: UniformSupport.h:45

mlir::ElementsAttr
Definition: Attributes.h:559