#include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LoopOps/LoopOps.h"
#include "mlir/Dialect/StandardOps/Ops.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/Debug.h"

Include dependency graph for LoopsToGPU.cpp:

Macros
#define	DEBUG_TYPE "loops-to-gpu"

Functions
template<typename OpTy >
LogicalResult	checkLoopNestMappableImpl (OpTy forOp, unsigned numDims)

template<typename OpTy >
LogicalResult	checkLoopNestMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims)

template<typename OpTy >
LogicalResult	checkLoopOpMappable (OpTy forOp, unsigned numBlockDims, unsigned numThreadDims)

template<typename OpTy >
OpTy	createGPULaunchLoops (OpTy rootForOp, ArrayRef< Value > ids, ArrayRef< Value > nids)

void	packIdAndNumId (gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, unsigned nDims, SmallVectorImpl< Value > &ids, SmallVectorImpl< Value > &nids)

template<typename OpTy >
LogicalResult	createLaunchBody (OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp, unsigned numBlockDims, unsigned numThreadDims)
	Generate the body of the launch operation. More...

template<typename OpTy >
LogicalResult	createLaunchFromOp (OpTy rootForOp, ArrayRef< Value > numWorkGroups, ArrayRef< Value > workGroupSizes)

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "loops-to-gpu"

Function Documentation

◆ checkLoopNestMappable()

template<typename OpTy >

LogicalResult checkLoopNestMappable	(	OpTy	forOp,
		unsigned	numBlockDims,
		unsigned	numThreadDims
	)

◆ checkLoopNestMappableImpl()

template<typename OpTy >

LogicalResult checkLoopNestMappableImpl	(	OpTy	forOp,
		unsigned	numDims
	)

◆ checkLoopOpMappable()

template<typename OpTy >

LogicalResult checkLoopOpMappable	(	OpTy	forOp,
		unsigned	numBlockDims,
		unsigned	numThreadDims
	)

◆ createGPULaunchLoops()

template<typename OpTy >

OpTy createGPULaunchLoops	(	OpTy	rootForOp,
		ArrayRef< Value >	ids,
		ArrayRef< Value >	nids
	)

Given nDims perfectly nested loops rooted as rootForOp, convert them o be partitioned across workgroups or workitems. The values for the workgroup/workitem id along each dimension is passed in with ids. The number of workgroups/workitems along each dimension are passed in with nids. The innermost loop is mapped to the x-dimension, followed by the next innermost loop to y-dimension, followed by z-dimension.

◆ createLaunchBody()

template<typename OpTy >

LogicalResult createLaunchBody	(	OpBuilder &	builder,
		OpTy	rootForOp,
		gpu::LaunchOp	launchOp,
		unsigned	numBlockDims,
		unsigned	numThreadDims
	)

Generate the body of the launch operation.

◆ createLaunchFromOp()

template<typename OpTy >

LogicalResult createLaunchFromOp	(	OpTy	rootForOp,
		ArrayRef< Value >	numWorkGroups,
		ArrayRef< Value >	workGroupSizes
	)

◆ packIdAndNumId()

void packIdAndNumId	(	gpu::KernelDim3	kernelIds,
		gpu::KernelDim3	kernelNids,
		unsigned	nDims,
		SmallVectorImpl< Value > &	ids,
		SmallVectorImpl< Value > &	nids
	)

Utility method to convert the gpu::KernelDim3 object for representing id of each workgroup/workitem and number of workgroup/workitems along a dimension of the launch into a container.

Macros

Functions

Macro Definition Documentation

◆ DEBUG_TYPE

Function Documentation

◆ checkLoopNestMappable()

◆ checkLoopNestMappableImpl()

◆ checkLoopOpMappable()

◆ createGPULaunchLoops()

◆ createLaunchBody()

◆ createLaunchFromOp()

◆ packIdAndNumId()