My Project
|
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/Ops.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"
Macros | |
#define | DEBUG_TYPE "loops-to-gpu" |
Functions | |
template<typename OpTy > | |
LogicalResult | checkLoopNestMappableImpl (OpTy forOp, unsigned numDims) |
template<typename OpTy > | |
LogicalResult | checkLoopNestMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims) |
template<typename OpTy > | |
LogicalResult | checkLoopOpMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims) |
template<typename OpTy > | |
OpTy | createGPULaunchLoops (OpTy rootForOp, ArrayRef< Value > ids, ArrayRef< Value > nids) |
void | packIdAndNumId (gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, unsigned nDims, SmallVectorImpl< Value > &ids, SmallVectorImpl< Value > &nids) |
template<typename OpTy > | |
LogicalResult | createLaunchBody (OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp, unsigned numBlockDims, unsigned numThreadDims) |
Generate the body of the launch operation. More... | |
template<typename OpTy > | |
LogicalResult | createLaunchFromOp (OpTy rootForOp, ArrayRef< Value > numWorkGroups, ArrayRef< Value > workGroupSizes) |
#define DEBUG_TYPE "loops-to-gpu" |
LogicalResult checkLoopNestMappable | ( | OpTy | forOp, |
unsigned | numBlockDims, | ||
unsigned | numThreadDims | ||
) |
LogicalResult checkLoopNestMappableImpl | ( | OpTy | forOp, |
unsigned | numDims | ||
) |
LogicalResult checkLoopOpMappable | ( | OpTy | forOp, |
unsigned | numBlockDims, | ||
unsigned | numThreadDims | ||
) |
OpTy createGPULaunchLoops | ( | OpTy | rootForOp, |
ArrayRef< Value > | ids, | ||
ArrayRef< Value > | nids | ||
) |
Given nDims
perfectly nested loops rooted as rootForOp
, convert them o be partitioned across workgroups or workitems. The values for the workgroup/workitem id along each dimension is passed in with ids
. The number of workgroups/workitems along each dimension are passed in with nids
. The innermost loop is mapped to the x-dimension, followed by the next innermost loop to y-dimension, followed by z-dimension.
LogicalResult createLaunchBody | ( | OpBuilder & | builder, |
OpTy | rootForOp, | ||
gpu::LaunchOp | launchOp, | ||
unsigned | numBlockDims, | ||
unsigned | numThreadDims | ||
) |
Generate the body of the launch operation.
LogicalResult createLaunchFromOp | ( | OpTy | rootForOp, |
ArrayRef< Value > | numWorkGroups, | ||
ArrayRef< Value > | workGroupSizes | ||
) |
void packIdAndNumId | ( | gpu::KernelDim3 | kernelIds, |
gpu::KernelDim3 | kernelNids, | ||
unsigned | nDims, | ||
SmallVectorImpl< Value > & | ids, | ||
SmallVectorImpl< Value > & | nids | ||
) |
Utility method to convert the gpu::KernelDim3 object for representing id of each workgroup/workitem and number of workgroup/workitems along a dimension of the launch into a container.