|
My Project
|
#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"#include "mlir/Dialect/AffineOps/AffineOps.h"#include "mlir/Dialect/GPU/GPUDialect.h"#include "mlir/Dialect/LoopOps/LoopOps.h"#include "mlir/Dialect/StandardOps/Ops.h"#include "mlir/IR/AffineExpr.h"#include "mlir/IR/Builders.h"#include "mlir/Transforms/LoopUtils.h"#include "mlir/Transforms/RegionUtils.h"#include "llvm/ADT/Sequence.h"#include "llvm/Support/Debug.h"
Macros | |
| #define | DEBUG_TYPE "loops-to-gpu" |
Functions | |
| template<typename OpTy > | |
| LogicalResult | checkLoopNestMappableImpl (OpTy forOp, unsigned numDims) |
| template<typename OpTy > | |
| LogicalResult | checkLoopNestMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims) |
| template<typename OpTy > | |
| LogicalResult | checkLoopOpMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims) |
| template<typename OpTy > | |
| OpTy | createGPULaunchLoops (OpTy rootForOp, ArrayRef< Value > ids, ArrayRef< Value > nids) |
| void | packIdAndNumId (gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, unsigned nDims, SmallVectorImpl< Value > &ids, SmallVectorImpl< Value > &nids) |
| template<typename OpTy > | |
| LogicalResult | createLaunchBody (OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp, unsigned numBlockDims, unsigned numThreadDims) |
| Generate the body of the launch operation. More... | |
| template<typename OpTy > | |
| LogicalResult | createLaunchFromOp (OpTy rootForOp, ArrayRef< Value > numWorkGroups, ArrayRef< Value > workGroupSizes) |
| #define DEBUG_TYPE "loops-to-gpu" |
| LogicalResult checkLoopNestMappable | ( | OpTy | forOp, |
| unsigned | numBlockDims, | ||
| unsigned | numThreadDims | ||
| ) |
| LogicalResult checkLoopNestMappableImpl | ( | OpTy | forOp, |
| unsigned | numDims | ||
| ) |
| LogicalResult checkLoopOpMappable | ( | OpTy | forOp, |
| unsigned | numBlockDims, | ||
| unsigned | numThreadDims | ||
| ) |
| OpTy createGPULaunchLoops | ( | OpTy | rootForOp, |
| ArrayRef< Value > | ids, | ||
| ArrayRef< Value > | nids | ||
| ) |
Given nDims perfectly nested loops rooted as rootForOp, convert them o be partitioned across workgroups or workitems. The values for the workgroup/workitem id along each dimension is passed in with ids. The number of workgroups/workitems along each dimension are passed in with nids. The innermost loop is mapped to the x-dimension, followed by the next innermost loop to y-dimension, followed by z-dimension.
| LogicalResult createLaunchBody | ( | OpBuilder & | builder, |
| OpTy | rootForOp, | ||
| gpu::LaunchOp | launchOp, | ||
| unsigned | numBlockDims, | ||
| unsigned | numThreadDims | ||
| ) |
Generate the body of the launch operation.
| LogicalResult createLaunchFromOp | ( | OpTy | rootForOp, |
| ArrayRef< Value > | numWorkGroups, | ||
| ArrayRef< Value > | workGroupSizes | ||
| ) |
| void packIdAndNumId | ( | gpu::KernelDim3 | kernelIds, |
| gpu::KernelDim3 | kernelNids, | ||
| unsigned | nDims, | ||
| SmallVectorImpl< Value > & | ids, | ||
| SmallVectorImpl< Value > & | nids | ||
| ) |
Utility method to convert the gpu::KernelDim3 object for representing id of each workgroup/workitem and number of workgroup/workitems along a dimension of the launch into a container.
1.8.13