This page was generated from tools/cli/using_debugresult.ipynb.
Using DebugResult¶
Here, we will show how to use DebugResult to debug some problems we might encounter when using our mlir-opt CLI Wrapper.
Let’s first import some necessary classes and generate an instance of our mlir-opt CLI Wrapper.
from mlir_graphblas import MlirOptCli
cli = MlirOptCli(executable=None, options=None)
Generate Example Input¶
Let’s say we have a bunch of MLIR code that we’re not familiar with.
mlir_string = """
#trait_sum_reduction = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> ()> // x (scalar out)
],
iterator_types = ["reduction", "reduction", "reduction"],
doc = "x += SUM_ijk A(i,j,k)"
}
#sparseTensor = #sparse_tensor.encoding<{
dimLevelType = [ "compressed", "compressed", "compressed" ],
dimOrdering = affine_map<(i,j,k) -> (i,j,k)>,
pointerBitWidth = 64,
indexBitWidth = 64
}>
func @func_f32(%argA: tensor<10x20x30xf32, #sparseTensor>) -> f32 {
%out_tensor = arith.constant dense<0.0> : tensor<f32>
%reduction = linalg.generic #trait_sum_reduction
ins(%argA: tensor<10x20x30xf32, #sparseTensor>)
outs(%out_tensor: tensor<f32>) {
^bb(%a: f32, %x: f32):
%0 = arith.addf %x, %a : f32
linalg.yield %0 : f32
} -> tensor<f32>
%answer = tensor.extract %reduction[] : tensor<f32>
return %answer : f32
}
"""
mlir_bytes = mlir_string.encode()
Since we’re not familiar with this code, we don’t exactly know what passes are necessary or in what order they should go in.
Let’s say that this is the first set of passes we try.
passes = [
"--sparsification",
"--sparse-tensor-conversion",
"--linalg-bufferize",
"--func-bufferize",
"--tensor-constant-bufferize",
"--tensor-bufferize",
"--finalizing-bufferize",
"--convert-linalg-to-loops",
"--convert-memref-to-llvm",
"--convert-openmp-to-llvm",
"--convert-arith-to-llvm",
"--convert-math-to-llvm",
"--convert-std-to-llvm",
"--reconcile-unrealized-casts"
]
Let’s see what results we get.
result = cli.apply_passes(mlir_bytes, passes)
---------------------------------------------------------------------------
MlirOptError Traceback (most recent call last)
/var/folders/yj/nmf5xtns3hx6qgdnybj964q80000gp/T/ipykernel_65605/2890307325.py in <module>
----> 1 result = cli.apply_passes(mlir_bytes, passes)
~/code/mlir-graphblas/mlir_graphblas/cli.py in apply_passes(self, file, passes)
87 input = self._read_input(fp)
88 err.debug_result = self.debug_passes(input, passes) if passes else None
---> 89 raise err
90
91 def debug_passes(self, input: bytes, passes: List[str]) -> "DebugResult":
MlirOptError: <stdin>:20:16: error: failed to legalize operation 'builtin.unrealized_conversion_cast' that was explicitly marked illegal
%reduction = linalg.generic #trait_sum_reduction
^
We get an exception.
Unfortunately, the exception message isn’t very clear as it only gives us the immediate error message but doesn’t inform us of the context in which it occurred, e.g. in which pass the error occurred (if any) or if any necessary passes are missing.
We only know that the operation builtin.unrealized_conversion_cast shows up somewhere and that it’s a problem.
Let’s try to use the debug_passes method instead of the apply_passes to get more information.
result = cli.debug_passes(mlir_bytes, passes)
result
=================================================
Error when running reconcile-unrealized-casts
=================================================
<stdin>:25:10: error: failed to legalize operation 'builtin.unrealized_conversion_cast' that was explicitly marked illegal
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
^
<stdin>:25:10: note: see current operation: %3 = "builtin.unrealized_conversion_cast"(%2) : (i64) -> index loc("<stdin>":25:10)
=======================================
Input to reconcile-unrealized-casts
=======================================
10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200
12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1|module attributes {llvm.data_layout = ""} {
2| llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
3| llvm.func @malloc(i64) -> !llvm.ptr<i8>
4| llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
5| llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
6| %0 = llvm.mlir.constant(1 : index) : i64
7| %1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
8| llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
9| %2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
10| llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
11| }
12| llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
13| llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
14| %0 = llvm.mlir.constant(1 : index) : i64
15| %1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
16| llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
17| %2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
18| llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
19| }
20| llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
21| llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
22| %0 = llvm.mlir.constant(0 : index) : i64
23| %1 = builtin.unrealized_conversion_cast %0 : i64 to index
24| %2 = llvm.mlir.constant(1 : index) : i64
25| %3 = builtin.unrealized_conversion_cast %2 : i64 to index
26| %4 = llvm.mlir.constant(2 : index) : i64
27| %5 = llvm.mlir.constant(1 : index) : i64
28| %6 = llvm.mlir.null : !llvm.ptr<f32>
29| %7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
30| %8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
31| %9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
32| %10 = llvm.mlir.constant(0 : index) : i64
33| %11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
34| %12 = llvm.mlir.constant(3735928559 : index) : i64
35| %13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
36| %14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
37| %15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
38| %16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
39| %17 = llvm.mlir.constant(0 : index) : i64
40| %18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
41| %19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
42| %20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
43| %21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
44| %22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
45| %23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
46| %24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
47| %25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
48| %26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
49| %27 = llvm.mlir.constant(1 : index) : i64
50| %28 = llvm.mlir.null : !llvm.ptr<f32>
51| %29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
52| %30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
53| %31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
54| %32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
55| %33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
56| %34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
57| %35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
58| %36 = llvm.mlir.constant(0 : index) : i64
59| %37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
60| %38 = llvm.mlir.constant(0 : index) : i64
61| %39 = llvm.mlir.constant(1 : index) : i64
62| %40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
63| llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
64| %41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
65| %42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
66| %43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
67| %44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
68| %45 = llvm.mlir.constant(0 : index) : i64
69| %46 = llvm.mlir.constant(1 : index) : i64
70| %47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
71| llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
72| %48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
73| %49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
74| %50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
75| %51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
76| %52 = llvm.mlir.constant(1 : index) : i64
77| %53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
78| llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
79| %54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
80| llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
81| %55 = llvm.mlir.constant(4 : index) : i64
82| llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
83| %56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
84| %57 = builtin.unrealized_conversion_cast %1 : index to i64
85| %58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
86| %59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
87| %60 = llvm.load %59 : !llvm.ptr<i64>
88| %61 = builtin.unrealized_conversion_cast %60 : i64 to index
89| %62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
90| %63 = builtin.unrealized_conversion_cast %3 : index to i64
91| %64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
92| %65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
93| %66 = llvm.load %65 : !llvm.ptr<i64>
94| %67 = builtin.unrealized_conversion_cast %66 : i64 to index
95| scf.for %arg1 = %61 to %67 step %3 {
96| %70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
97| %71 = builtin.unrealized_conversion_cast %arg1 : index to i64
98| %72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
99| %73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
100| %74 = llvm.load %73 : !llvm.ptr<i64>
101| %75 = builtin.unrealized_conversion_cast %74 : i64 to index
102| %76 = builtin.unrealized_conversion_cast %arg1 : index to i64
103| %77 = llvm.add %76, %2 : i64
104| %78 = builtin.unrealized_conversion_cast %77 : i64 to index
105| %79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
106| %80 = builtin.unrealized_conversion_cast %78 : index to i64
107| %81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
108| %82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
109| %83 = llvm.load %82 : !llvm.ptr<i64>
110| %84 = builtin.unrealized_conversion_cast %83 : i64 to index
111| scf.for %arg2 = %75 to %84 step %3 {
112| %85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
113| %86 = builtin.unrealized_conversion_cast %arg2 : index to i64
114| %87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
115| %88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
116| %89 = llvm.load %88 : !llvm.ptr<i64>
117| %90 = builtin.unrealized_conversion_cast %89 : i64 to index
118| %91 = builtin.unrealized_conversion_cast %arg2 : index to i64
119| %92 = llvm.add %91, %2 : i64
120| %93 = builtin.unrealized_conversion_cast %92 : i64 to index
121| %94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
122| %95 = builtin.unrealized_conversion_cast %93 : index to i64
123| %96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
124| %97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
125| %98 = llvm.load %97 : !llvm.ptr<i64>
126| %99 = builtin.unrealized_conversion_cast %98 : i64 to index
127| %100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
128| %101 = llvm.load %100 : !llvm.ptr<f32>
129| %102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
130| %104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
131| %105 = builtin.unrealized_conversion_cast %arg3 : index to i64
132| %106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
133| %107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
134| %108 = llvm.load %107 : !llvm.ptr<f32>
135| %109 = llvm.fadd %arg4, %108 : f32
136| scf.yield %109 : f32
137| }
138| %103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
139| llvm.store %102, %103 : !llvm.ptr<f32>
140| }
141| }
142| %68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
143| %69 = llvm.load %68 : !llvm.ptr<f32>
144| llvm.return %69 : f32
145| }
146|}
147|
================================
Input to convert-std-to-llvm
================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = builtin.unrealized_conversion_cast %0 : i64 to index
%2 = llvm.mlir.constant(1 : index) : i64
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
%4 = llvm.mlir.constant(2 : index) : i64
%5 = llvm.mlir.constant(1 : index) : i64
%6 = llvm.mlir.null : !llvm.ptr<f32>
%7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
%9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%12 = llvm.mlir.constant(3735928559 : index) : i64
%13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
%14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%17 = llvm.mlir.constant(0 : index) : i64
%18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
%27 = llvm.mlir.constant(1 : index) : i64
%28 = llvm.mlir.null : !llvm.ptr<f32>
%29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
%31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
%32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
%33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%38 = llvm.mlir.constant(0 : index) : i64
%39 = llvm.mlir.constant(1 : index) : i64
%40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
%44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
%45 = llvm.mlir.constant(0 : index) : i64
%46 = llvm.mlir.constant(1 : index) : i64
%47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
%51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
%52 = llvm.mlir.constant(1 : index) : i64
%53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%55 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%57 = builtin.unrealized_conversion_cast %1 : index to i64
%58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%60 = llvm.load %59 : !llvm.ptr<i64>
%61 = builtin.unrealized_conversion_cast %60 : i64 to index
%62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%63 = builtin.unrealized_conversion_cast %3 : index to i64
%64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%66 = llvm.load %65 : !llvm.ptr<i64>
%67 = builtin.unrealized_conversion_cast %66 : i64 to index
scf.for %arg1 = %61 to %67 step %3 {
%70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = builtin.unrealized_conversion_cast %arg1 : index to i64
%72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%74 = llvm.load %73 : !llvm.ptr<i64>
%75 = builtin.unrealized_conversion_cast %74 : i64 to index
%76 = builtin.unrealized_conversion_cast %arg1 : index to i64
%77 = llvm.add %76, %2 : i64
%78 = builtin.unrealized_conversion_cast %77 : i64 to index
%79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%80 = builtin.unrealized_conversion_cast %78 : index to i64
%81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%83 = llvm.load %82 : !llvm.ptr<i64>
%84 = builtin.unrealized_conversion_cast %83 : i64 to index
scf.for %arg2 = %75 to %84 step %3 {
%85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%86 = builtin.unrealized_conversion_cast %arg2 : index to i64
%87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%89 = llvm.load %88 : !llvm.ptr<i64>
%90 = builtin.unrealized_conversion_cast %89 : i64 to index
%91 = builtin.unrealized_conversion_cast %arg2 : index to i64
%92 = llvm.add %91, %2 : i64
%93 = builtin.unrealized_conversion_cast %92 : i64 to index
%94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%95 = builtin.unrealized_conversion_cast %93 : index to i64
%96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%98 = llvm.load %97 : !llvm.ptr<i64>
%99 = builtin.unrealized_conversion_cast %98 : i64 to index
%100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%101 = llvm.load %100 : !llvm.ptr<f32>
%102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
%104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%105 = builtin.unrealized_conversion_cast %arg3 : index to i64
%106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%108 = llvm.load %107 : !llvm.ptr<f32>
%109 = llvm.fadd %arg4, %108 : f32
scf.yield %109 : f32
}
%103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %102, %103 : !llvm.ptr<f32>
}
}
%68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%69 = llvm.load %68 : !llvm.ptr<f32>
llvm.return %69 : f32
}
}
=================================
Input to convert-math-to-llvm
=================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = builtin.unrealized_conversion_cast %0 : i64 to index
%2 = llvm.mlir.constant(1 : index) : i64
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
%4 = llvm.mlir.constant(2 : index) : i64
%5 = llvm.mlir.constant(1 : index) : i64
%6 = llvm.mlir.null : !llvm.ptr<f32>
%7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
%9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%12 = llvm.mlir.constant(3735928559 : index) : i64
%13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
%14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%17 = llvm.mlir.constant(0 : index) : i64
%18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
%27 = llvm.mlir.constant(1 : index) : i64
%28 = llvm.mlir.null : !llvm.ptr<f32>
%29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
%31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
%32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
%33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%38 = llvm.mlir.constant(0 : index) : i64
%39 = llvm.mlir.constant(1 : index) : i64
%40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
%44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
%45 = llvm.mlir.constant(0 : index) : i64
%46 = llvm.mlir.constant(1 : index) : i64
%47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
%51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
%52 = llvm.mlir.constant(1 : index) : i64
%53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%55 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%57 = builtin.unrealized_conversion_cast %1 : index to i64
%58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%60 = llvm.load %59 : !llvm.ptr<i64>
%61 = builtin.unrealized_conversion_cast %60 : i64 to index
%62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%63 = builtin.unrealized_conversion_cast %3 : index to i64
%64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%66 = llvm.load %65 : !llvm.ptr<i64>
%67 = builtin.unrealized_conversion_cast %66 : i64 to index
scf.for %arg1 = %61 to %67 step %3 {
%70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = builtin.unrealized_conversion_cast %arg1 : index to i64
%72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%74 = llvm.load %73 : !llvm.ptr<i64>
%75 = builtin.unrealized_conversion_cast %74 : i64 to index
%76 = builtin.unrealized_conversion_cast %arg1 : index to i64
%77 = llvm.add %76, %2 : i64
%78 = builtin.unrealized_conversion_cast %77 : i64 to index
%79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%80 = builtin.unrealized_conversion_cast %78 : index to i64
%81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%83 = llvm.load %82 : !llvm.ptr<i64>
%84 = builtin.unrealized_conversion_cast %83 : i64 to index
scf.for %arg2 = %75 to %84 step %3 {
%85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%86 = builtin.unrealized_conversion_cast %arg2 : index to i64
%87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%89 = llvm.load %88 : !llvm.ptr<i64>
%90 = builtin.unrealized_conversion_cast %89 : i64 to index
%91 = builtin.unrealized_conversion_cast %arg2 : index to i64
%92 = llvm.add %91, %2 : i64
%93 = builtin.unrealized_conversion_cast %92 : i64 to index
%94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%95 = builtin.unrealized_conversion_cast %93 : index to i64
%96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%98 = llvm.load %97 : !llvm.ptr<i64>
%99 = builtin.unrealized_conversion_cast %98 : i64 to index
%100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%101 = llvm.load %100 : !llvm.ptr<f32>
%102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
%104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%105 = builtin.unrealized_conversion_cast %arg3 : index to i64
%106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%108 = llvm.load %107 : !llvm.ptr<f32>
%109 = llvm.fadd %arg4, %108 : f32
scf.yield %109 : f32
}
%103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %102, %103 : !llvm.ptr<f32>
}
}
%68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%69 = llvm.load %68 : !llvm.ptr<f32>
llvm.return %69 : f32
}
}
==================================
Input to convert-arith-to-llvm
==================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = builtin.unrealized_conversion_cast %0 : i64 to index
%2 = llvm.mlir.constant(1 : index) : i64
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
%4 = llvm.mlir.constant(2 : index) : i64
%5 = llvm.mlir.constant(1 : index) : i64
%6 = llvm.mlir.null : !llvm.ptr<f32>
%7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
%9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%12 = llvm.mlir.constant(3735928559 : index) : i64
%13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
%14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%17 = llvm.mlir.constant(0 : index) : i64
%18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
%27 = llvm.mlir.constant(1 : index) : i64
%28 = llvm.mlir.null : !llvm.ptr<f32>
%29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
%31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
%32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
%33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%38 = llvm.mlir.constant(0 : index) : i64
%39 = llvm.mlir.constant(1 : index) : i64
%40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
%44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
%45 = llvm.mlir.constant(0 : index) : i64
%46 = llvm.mlir.constant(1 : index) : i64
%47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
%51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
%52 = llvm.mlir.constant(1 : index) : i64
%53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%55 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%57 = builtin.unrealized_conversion_cast %1 : index to i64
%58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%60 = llvm.load %59 : !llvm.ptr<i64>
%61 = builtin.unrealized_conversion_cast %60 : i64 to index
%62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%63 = builtin.unrealized_conversion_cast %3 : index to i64
%64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%66 = llvm.load %65 : !llvm.ptr<i64>
%67 = builtin.unrealized_conversion_cast %66 : i64 to index
scf.for %arg1 = %61 to %67 step %3 {
%70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = builtin.unrealized_conversion_cast %arg1 : index to i64
%72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%74 = llvm.load %73 : !llvm.ptr<i64>
%75 = builtin.unrealized_conversion_cast %74 : i64 to index
%76 = builtin.unrealized_conversion_cast %arg1 : index to i64
%77 = llvm.add %76, %2 : i64
%78 = builtin.unrealized_conversion_cast %77 : i64 to index
%79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%80 = builtin.unrealized_conversion_cast %78 : index to i64
%81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%83 = llvm.load %82 : !llvm.ptr<i64>
%84 = builtin.unrealized_conversion_cast %83 : i64 to index
scf.for %arg2 = %75 to %84 step %3 {
%85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%86 = builtin.unrealized_conversion_cast %arg2 : index to i64
%87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%89 = llvm.load %88 : !llvm.ptr<i64>
%90 = builtin.unrealized_conversion_cast %89 : i64 to index
%91 = builtin.unrealized_conversion_cast %arg2 : index to i64
%92 = llvm.add %91, %2 : i64
%93 = builtin.unrealized_conversion_cast %92 : i64 to index
%94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%95 = builtin.unrealized_conversion_cast %93 : index to i64
%96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%98 = llvm.load %97 : !llvm.ptr<i64>
%99 = builtin.unrealized_conversion_cast %98 : i64 to index
%100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%101 = llvm.load %100 : !llvm.ptr<f32>
%102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
%104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%105 = builtin.unrealized_conversion_cast %arg3 : index to i64
%106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%108 = llvm.load %107 : !llvm.ptr<f32>
%109 = llvm.fadd %arg4, %108 : f32
scf.yield %109 : f32
}
%103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %102, %103 : !llvm.ptr<f32>
}
}
%68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%69 = llvm.load %68 : !llvm.ptr<f32>
llvm.return %69 : f32
}
}
===================================
Input to convert-openmp-to-llvm
===================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.mlir.null : !llvm.ptr<f32>
%2 = llvm.getelementptr %1[%0] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%3 = llvm.ptrtoint %2 : !llvm.ptr<f32> to i64
%4 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%5 = llvm.mlir.constant(0 : index) : i64
%6 = llvm.getelementptr %4[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%7 = llvm.mlir.constant(3735928559 : index) : i64
%8 = llvm.inttoptr %7 : i64 to !llvm.ptr<f32>
%9 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%10 = llvm.insertvalue %8, %9[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%11 = llvm.insertvalue %6, %10[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%12 = llvm.mlir.constant(0 : index) : i64
%13 = llvm.insertvalue %12, %11[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%14 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%15 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%16 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%17 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%18 = llvm.mlir.constant(1 : index) : i64
%19 = llvm.mlir.null : !llvm.ptr<f32>
%20 = llvm.getelementptr %19[%18] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%21 = llvm.ptrtoint %20 : !llvm.ptr<f32> to i64
%22 = llvm.call @malloc(%21) : (i64) -> !llvm.ptr<i8>
%23 = llvm.bitcast %22 : !llvm.ptr<i8> to !llvm.ptr<f32>
%24 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%25 = llvm.insertvalue %23, %24[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%26 = llvm.insertvalue %23, %25[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%27 = llvm.mlir.constant(0 : index) : i64
%28 = llvm.insertvalue %27, %26[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%29 = llvm.mlir.constant(0 : index) : i64
%30 = llvm.mlir.constant(1 : index) : i64
%31 = llvm.alloca %30 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %13, %31 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%32 = llvm.bitcast %31 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%33 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%34 = llvm.insertvalue %29, %33[0] : !llvm.struct<(i64, ptr<i8>)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(i64, ptr<i8>)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.mlir.constant(1 : index) : i64
%38 = llvm.alloca %37 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %28, %38 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%39 = llvm.bitcast %38 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%40 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%41 = llvm.insertvalue %36, %40[0] : !llvm.struct<(i64, ptr<i8>)>
%42 = llvm.insertvalue %39, %41[1] : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.mlir.constant(1 : index) : i64
%44 = llvm.alloca %43 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %35, %44 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%45 = llvm.alloca %43 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %42, %45 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%46 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%46, %44, %45) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%47 = builtin.unrealized_conversion_cast %14 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%48 = builtin.unrealized_conversion_cast %c0 : index to i64
%49 = llvm.extractvalue %47[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%50 = llvm.getelementptr %49[%48] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%51 = llvm.load %50 : !llvm.ptr<i64>
%52 = arith.index_cast %51 : i64 to index
%53 = builtin.unrealized_conversion_cast %14 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%54 = builtin.unrealized_conversion_cast %c1 : index to i64
%55 = llvm.extractvalue %53[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%56 = llvm.getelementptr %55[%54] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%57 = llvm.load %56 : !llvm.ptr<i64>
%58 = arith.index_cast %57 : i64 to index
scf.for %arg1 = %52 to %58 step %c1 {
%61 = builtin.unrealized_conversion_cast %15 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%62 = builtin.unrealized_conversion_cast %arg1 : index to i64
%63 = llvm.extractvalue %61[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%64 = llvm.getelementptr %63[%62] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%65 = llvm.load %64 : !llvm.ptr<i64>
%66 = arith.index_cast %65 : i64 to index
%67 = arith.addi %arg1, %c1 : index
%68 = builtin.unrealized_conversion_cast %15 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%69 = builtin.unrealized_conversion_cast %67 : index to i64
%70 = llvm.extractvalue %68[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = llvm.getelementptr %70[%69] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%72 = llvm.load %71 : !llvm.ptr<i64>
%73 = arith.index_cast %72 : i64 to index
scf.for %arg2 = %66 to %73 step %c1 {
%74 = builtin.unrealized_conversion_cast %16 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%75 = builtin.unrealized_conversion_cast %arg2 : index to i64
%76 = llvm.extractvalue %74[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%77 = llvm.getelementptr %76[%75] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%78 = llvm.load %77 : !llvm.ptr<i64>
%79 = arith.index_cast %78 : i64 to index
%80 = arith.addi %arg2, %c1 : index
%81 = builtin.unrealized_conversion_cast %16 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = builtin.unrealized_conversion_cast %80 : index to i64
%83 = llvm.extractvalue %81[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%84 = llvm.getelementptr %83[%82] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%85 = llvm.load %84 : !llvm.ptr<i64>
%86 = arith.index_cast %85 : i64 to index
%87 = llvm.extractvalue %28[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%88 = llvm.load %87 : !llvm.ptr<f32>
%89 = scf.for %arg3 = %79 to %86 step %c1 iter_args(%arg4 = %88) -> (f32) {
%91 = builtin.unrealized_conversion_cast %17 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%92 = builtin.unrealized_conversion_cast %arg3 : index to i64
%93 = llvm.extractvalue %91[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%94 = llvm.getelementptr %93[%92] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%95 = llvm.load %94 : !llvm.ptr<f32>
%96 = arith.addf %arg4, %95 : f32
scf.yield %96 : f32
}
%90 = llvm.extractvalue %28[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %89, %90 : !llvm.ptr<f32>
}
}
%59 = llvm.extractvalue %28[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%60 = llvm.load %59 : !llvm.ptr<f32>
return %60 : f32
}
}
===================================
Input to convert-memref-to-llvm
===================================
module {
memref.global "private" constant @__constant_xf32 : memref<f32> = dense<0.000000e+00>
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = memref.get_global @__constant_xf32 : memref<f32>
%1 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%2 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%4 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%5 = memref.alloc() : memref<f32>
memref.copy %0, %5 : memref<f32> to memref<f32>
%6 = memref.load %1[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %1[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%11 = memref.load %2[%arg1] : memref<?xi64>
%12 = arith.index_cast %11 : i64 to index
%13 = arith.addi %arg1, %c1 : index
%14 = memref.load %2[%13] : memref<?xi64>
%15 = arith.index_cast %14 : i64 to index
scf.for %arg2 = %12 to %15 step %c1 {
%16 = memref.load %3[%arg2] : memref<?xi64>
%17 = arith.index_cast %16 : i64 to index
%18 = arith.addi %arg2, %c1 : index
%19 = memref.load %3[%18] : memref<?xi64>
%20 = arith.index_cast %19 : i64 to index
%21 = memref.load %5[] : memref<f32>
%22 = scf.for %arg3 = %17 to %20 step %c1 iter_args(%arg4 = %21) -> (f32) {
%23 = memref.load %4[%arg3] : memref<?xf32>
%24 = arith.addf %arg4, %23 : f32
scf.yield %24 : f32
}
memref.store %22, %5[] : memref<f32>
}
}
%10 = memref.load %5[] : memref<f32>
return %10 : f32
}
}
====================================
Input to convert-linalg-to-loops
====================================
module {
memref.global "private" constant @__constant_xf32 : memref<f32> = dense<0.000000e+00>
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = memref.get_global @__constant_xf32 : memref<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%1 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%2 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%4 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%5 = memref.alloc() : memref<f32>
memref.copy %0, %5 : memref<f32> to memref<f32>
%6 = memref.load %1[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %1[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%11 = memref.load %2[%arg1] : memref<?xi64>
%12 = arith.index_cast %11 : i64 to index
%13 = arith.addi %arg1, %c1 : index
%14 = memref.load %2[%13] : memref<?xi64>
%15 = arith.index_cast %14 : i64 to index
scf.for %arg2 = %12 to %15 step %c1 {
%16 = memref.load %3[%arg2] : memref<?xi64>
%17 = arith.index_cast %16 : i64 to index
%18 = arith.addi %arg2, %c1 : index
%19 = memref.load %3[%18] : memref<?xi64>
%20 = arith.index_cast %19 : i64 to index
%21 = memref.load %5[] : memref<f32>
%22 = scf.for %arg3 = %17 to %20 step %c1 iter_args(%arg4 = %21) -> (f32) {
%23 = memref.load %4[%arg3] : memref<?xf32>
%24 = arith.addf %arg4, %23 : f32
scf.yield %24 : f32
}
memref.store %22, %5[] : memref<f32>
}
}
%10 = memref.load %5[] : memref<f32>
return %10 : f32
}
}
=================================
Input to finalizing-bufferize
=================================
module {
memref.global "private" constant @__constant_xf32 : memref<f32> = dense<0.000000e+00>
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = memref.get_global @__constant_xf32 : memref<f32>
%1 = memref.tensor_load %0 : memref<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%2 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%4 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%5 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%6 = memref.buffer_cast %1 : memref<f32>
%7 = memref.alloc() : memref<f32>
memref.copy %6, %7 : memref<f32> to memref<f32>
%8 = memref.load %2[%c0] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
%10 = memref.load %2[%c1] : memref<?xi64>
%11 = arith.index_cast %10 : i64 to index
scf.for %arg1 = %9 to %11 step %c1 {
%15 = memref.load %3[%arg1] : memref<?xi64>
%16 = arith.index_cast %15 : i64 to index
%17 = arith.addi %arg1, %c1 : index
%18 = memref.load %3[%17] : memref<?xi64>
%19 = arith.index_cast %18 : i64 to index
scf.for %arg2 = %16 to %19 step %c1 {
%20 = memref.load %4[%arg2] : memref<?xi64>
%21 = arith.index_cast %20 : i64 to index
%22 = arith.addi %arg2, %c1 : index
%23 = memref.load %4[%22] : memref<?xi64>
%24 = arith.index_cast %23 : i64 to index
%25 = memref.load %7[] : memref<f32>
%26 = scf.for %arg3 = %21 to %24 step %c1 iter_args(%arg4 = %25) -> (f32) {
%27 = memref.load %5[%arg3] : memref<?xf32>
%28 = arith.addf %arg4, %27 : f32
scf.yield %28 : f32
}
memref.store %26, %7[] : memref<f32>
}
}
%12 = memref.tensor_load %7 : memref<f32>
%13 = memref.buffer_cast %12 : memref<f32>
%14 = memref.load %13[] : memref<f32>
return %14 : f32
}
}
=============================
Input to tensor-bufferize
=============================
module {
memref.global "private" constant @__constant_xf32 : memref<f32> = dense<0.000000e+00>
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = memref.get_global @__constant_xf32 : memref<f32>
%1 = memref.tensor_load %0 : memref<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%2 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%4 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%5 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%6 = memref.buffer_cast %1 : memref<f32>
%7 = memref.alloc() : memref<f32>
memref.copy %6, %7 : memref<f32> to memref<f32>
%8 = memref.load %2[%c0] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
%10 = memref.load %2[%c1] : memref<?xi64>
%11 = arith.index_cast %10 : i64 to index
scf.for %arg1 = %9 to %11 step %c1 {
%14 = memref.load %3[%arg1] : memref<?xi64>
%15 = arith.index_cast %14 : i64 to index
%16 = arith.addi %arg1, %c1 : index
%17 = memref.load %3[%16] : memref<?xi64>
%18 = arith.index_cast %17 : i64 to index
scf.for %arg2 = %15 to %18 step %c1 {
%19 = memref.load %4[%arg2] : memref<?xi64>
%20 = arith.index_cast %19 : i64 to index
%21 = arith.addi %arg2, %c1 : index
%22 = memref.load %4[%21] : memref<?xi64>
%23 = arith.index_cast %22 : i64 to index
%24 = memref.load %7[] : memref<f32>
%25 = scf.for %arg3 = %20 to %23 step %c1 iter_args(%arg4 = %24) -> (f32) {
%26 = memref.load %5[%arg3] : memref<?xf32>
%27 = arith.addf %arg4, %26 : f32
scf.yield %27 : f32
}
memref.store %25, %7[] : memref<f32>
}
}
%12 = memref.tensor_load %7 : memref<f32>
%13 = tensor.extract %12[] : tensor<f32>
return %13 : f32
}
}
======================================
Input to tensor-constant-bufferize
======================================
module {
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%cst = arith.constant dense<0.000000e+00> : tensor<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%1 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%2 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%4 = memref.buffer_cast %cst : memref<f32>
%5 = memref.alloc() : memref<f32>
memref.copy %4, %5 : memref<f32> to memref<f32>
%6 = memref.load %0[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %0[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%12 = memref.load %1[%arg1] : memref<?xi64>
%13 = arith.index_cast %12 : i64 to index
%14 = arith.addi %arg1, %c1 : index
%15 = memref.load %1[%14] : memref<?xi64>
%16 = arith.index_cast %15 : i64 to index
scf.for %arg2 = %13 to %16 step %c1 {
%17 = memref.load %2[%arg2] : memref<?xi64>
%18 = arith.index_cast %17 : i64 to index
%19 = arith.addi %arg2, %c1 : index
%20 = memref.load %2[%19] : memref<?xi64>
%21 = arith.index_cast %20 : i64 to index
%22 = memref.load %5[] : memref<f32>
%23 = scf.for %arg3 = %18 to %21 step %c1 iter_args(%arg4 = %22) -> (f32) {
%24 = memref.load %3[%arg3] : memref<?xf32>
%25 = arith.addf %arg4, %24 : f32
scf.yield %25 : f32
}
memref.store %23, %5[] : memref<f32>
}
}
%10 = memref.tensor_load %5 : memref<f32>
%11 = tensor.extract %10[] : tensor<f32>
return %11 : f32
}
}
===========================
Input to func-bufferize
===========================
module {
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%cst = arith.constant dense<0.000000e+00> : tensor<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%1 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%2 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%4 = memref.buffer_cast %cst : memref<f32>
%5 = memref.alloc() : memref<f32>
memref.copy %4, %5 : memref<f32> to memref<f32>
%6 = memref.load %0[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %0[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%12 = memref.load %1[%arg1] : memref<?xi64>
%13 = arith.index_cast %12 : i64 to index
%14 = arith.addi %arg1, %c1 : index
%15 = memref.load %1[%14] : memref<?xi64>
%16 = arith.index_cast %15 : i64 to index
scf.for %arg2 = %13 to %16 step %c1 {
%17 = memref.load %2[%arg2] : memref<?xi64>
%18 = arith.index_cast %17 : i64 to index
%19 = arith.addi %arg2, %c1 : index
%20 = memref.load %2[%19] : memref<?xi64>
%21 = arith.index_cast %20 : i64 to index
%22 = memref.load %5[] : memref<f32>
%23 = scf.for %arg3 = %18 to %21 step %c1 iter_args(%arg4 = %22) -> (f32) {
%24 = memref.load %3[%arg3] : memref<?xf32>
%25 = arith.addf %arg4, %24 : f32
scf.yield %25 : f32
}
memref.store %23, %5[] : memref<f32>
}
}
%10 = memref.tensor_load %5 : memref<f32>
%11 = tensor.extract %10[] : tensor<f32>
return %11 : f32
}
}
=============================
Input to linalg-bufferize
=============================
module {
func private @sparseValuesF32(!llvm.ptr<i8>) -> memref<?xf32> attributes {llvm.emit_c_interface}
func private @sparsePointers64(!llvm.ptr<i8>, index) -> memref<?xi64> attributes {llvm.emit_c_interface}
func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%cst = arith.constant dense<0.000000e+00> : tensor<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = call @sparsePointers64(%arg0, %c0) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%1 = call @sparsePointers64(%arg0, %c1) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%2 = call @sparsePointers64(%arg0, %c2) : (!llvm.ptr<i8>, index) -> memref<?xi64>
%3 = call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> memref<?xf32>
%4 = memref.buffer_cast %cst : memref<f32>
%5 = memref.alloc() : memref<f32>
memref.copy %4, %5 : memref<f32> to memref<f32>
%6 = memref.load %0[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %0[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%12 = memref.load %1[%arg1] : memref<?xi64>
%13 = arith.index_cast %12 : i64 to index
%14 = arith.addi %arg1, %c1 : index
%15 = memref.load %1[%14] : memref<?xi64>
%16 = arith.index_cast %15 : i64 to index
scf.for %arg2 = %13 to %16 step %c1 {
%17 = memref.load %2[%arg2] : memref<?xi64>
%18 = arith.index_cast %17 : i64 to index
%19 = arith.addi %arg2, %c1 : index
%20 = memref.load %2[%19] : memref<?xi64>
%21 = arith.index_cast %20 : i64 to index
%22 = memref.load %5[] : memref<f32>
%23 = scf.for %arg3 = %18 to %21 step %c1 iter_args(%arg4 = %22) -> (f32) {
%24 = memref.load %3[%arg3] : memref<?xf32>
%25 = arith.addf %arg4, %24 : f32
scf.yield %25 : f32
}
memref.store %23, %5[] : memref<f32>
}
}
%10 = memref.tensor_load %5 : memref<f32>
%11 = tensor.extract %10[] : tensor<f32>
return %11 : f32
}
}
=====================================
Input to sparse-tensor-conversion
=====================================
module {
func @func_f32(%arg0: tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, pointerBitWidth = 64, indexBitWidth = 64 }>>) -> f32 {
%cst = arith.constant dense<0.000000e+00> : tensor<f32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%0 = sparse_tensor.pointers %arg0, %c0 : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, pointerBitWidth = 64, indexBitWidth = 64 }>> to memref<?xi64>
%1 = sparse_tensor.pointers %arg0, %c1 : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, pointerBitWidth = 64, indexBitWidth = 64 }>> to memref<?xi64>
%2 = sparse_tensor.pointers %arg0, %c2 : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, pointerBitWidth = 64, indexBitWidth = 64 }>> to memref<?xi64>
%3 = sparse_tensor.values %arg0 : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, pointerBitWidth = 64, indexBitWidth = 64 }>> to memref<?xf32>
%4 = memref.buffer_cast %cst : memref<f32>
%5 = memref.alloc() : memref<f32>
memref.copy %4, %5 : memref<f32> to memref<f32>
%6 = memref.load %0[%c0] : memref<?xi64>
%7 = arith.index_cast %6 : i64 to index
%8 = memref.load %0[%c1] : memref<?xi64>
%9 = arith.index_cast %8 : i64 to index
scf.for %arg1 = %7 to %9 step %c1 {
%12 = memref.load %1[%arg1] : memref<?xi64>
%13 = arith.index_cast %12 : i64 to index
%14 = arith.addi %arg1, %c1 : index
%15 = memref.load %1[%14] : memref<?xi64>
%16 = arith.index_cast %15 : i64 to index
scf.for %arg2 = %13 to %16 step %c1 {
%17 = memref.load %2[%arg2] : memref<?xi64>
%18 = arith.index_cast %17 : i64 to index
%19 = arith.addi %arg2, %c1 : index
%20 = memref.load %2[%19] : memref<?xi64>
%21 = arith.index_cast %20 : i64 to index
%22 = memref.load %5[] : memref<f32>
%23 = scf.for %arg3 = %18 to %21 step %c1 iter_args(%arg4 = %22) -> (f32) {
%24 = memref.load %3[%arg3] : memref<?xf32>
%25 = arith.addf %arg4, %24 : f32
scf.yield %25 : f32
}
memref.store %23, %5[] : memref<f32>
}
}
%10 = memref.tensor_load %5 : memref<f32>
%11 = tensor.extract %10[] : tensor<f32>
return %11 : f32
}
}
===========================
Input to sparsification
===========================
#trait_sum_reduction = {
indexing_maps = [
affine_map<(i,j,k) -> (i,j,k)>, // A
affine_map<(i,j,k) -> ()> // x (scalar out)
],
iterator_types = ["reduction", "reduction", "reduction"],
doc = "x += SUM_ijk A(i,j,k)"
}
#sparseTensor = #sparse_tensor.encoding<{
dimLevelType = [ "compressed", "compressed", "compressed" ],
dimOrdering = affine_map<(i,j,k) -> (i,j,k)>,
pointerBitWidth = 64,
indexBitWidth = 64
}>
func @func_f32(%argA: tensor<10x20x30xf32, #sparseTensor>) -> f32 {
%out_tensor = arith.constant dense<0.0> : tensor<f32>
%reduction = linalg.generic #trait_sum_reduction
ins(%argA: tensor<10x20x30xf32, #sparseTensor>)
outs(%out_tensor: tensor<f32>) {
^bb(%a: f32, %x: f32):
%0 = arith.addf %x, %a : f32
linalg.yield %0 : f32
} -> tensor<f32>
%answer = tensor.extract %reduction[] : tensor<f32>
return %answer : f32
}
This large output may seem intimidating due to it’s size, but it’s mostly large since it’s showing the inputs to each pass.
We know that the error happens when the builtin.unrealized_conversion_cast operation occurs.
We can see from the output above that it happens during the convert-std-to-llvm pass.
It’s likely that there’s something problematic in the input to that pass, so it’s worth looking into the IR that was given to the convert-std-to-llvm pass, which we can see under the section labelled ````. We’ll show a sort snippet of it below.
result_string = str(result)
lines = result_string.splitlines()
lines = lines[lines.index(" Input to convert-std-to-llvm ")-1:]
lines = lines[:lines.index("")]
print("\n".join(lines))
================================
Input to convert-std-to-llvm
================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = builtin.unrealized_conversion_cast %0 : i64 to index
%2 = llvm.mlir.constant(1 : index) : i64
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
%4 = llvm.mlir.constant(2 : index) : i64
%5 = llvm.mlir.constant(1 : index) : i64
%6 = llvm.mlir.null : !llvm.ptr<f32>
%7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
%9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%12 = llvm.mlir.constant(3735928559 : index) : i64
%13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
%14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%17 = llvm.mlir.constant(0 : index) : i64
%18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
%27 = llvm.mlir.constant(1 : index) : i64
%28 = llvm.mlir.null : !llvm.ptr<f32>
%29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
%31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
%32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
%33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%38 = llvm.mlir.constant(0 : index) : i64
%39 = llvm.mlir.constant(1 : index) : i64
%40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
%44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
%45 = llvm.mlir.constant(0 : index) : i64
%46 = llvm.mlir.constant(1 : index) : i64
%47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
%51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
%52 = llvm.mlir.constant(1 : index) : i64
%53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%55 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%57 = builtin.unrealized_conversion_cast %1 : index to i64
%58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%60 = llvm.load %59 : !llvm.ptr<i64>
%61 = builtin.unrealized_conversion_cast %60 : i64 to index
%62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%63 = builtin.unrealized_conversion_cast %3 : index to i64
%64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%66 = llvm.load %65 : !llvm.ptr<i64>
%67 = builtin.unrealized_conversion_cast %66 : i64 to index
scf.for %arg1 = %61 to %67 step %3 {
%70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = builtin.unrealized_conversion_cast %arg1 : index to i64
%72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%74 = llvm.load %73 : !llvm.ptr<i64>
%75 = builtin.unrealized_conversion_cast %74 : i64 to index
%76 = builtin.unrealized_conversion_cast %arg1 : index to i64
%77 = llvm.add %76, %2 : i64
%78 = builtin.unrealized_conversion_cast %77 : i64 to index
%79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%80 = builtin.unrealized_conversion_cast %78 : index to i64
%81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%83 = llvm.load %82 : !llvm.ptr<i64>
%84 = builtin.unrealized_conversion_cast %83 : i64 to index
scf.for %arg2 = %75 to %84 step %3 {
%85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%86 = builtin.unrealized_conversion_cast %arg2 : index to i64
%87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%89 = llvm.load %88 : !llvm.ptr<i64>
%90 = builtin.unrealized_conversion_cast %89 : i64 to index
%91 = builtin.unrealized_conversion_cast %arg2 : index to i64
%92 = llvm.add %91, %2 : i64
%93 = builtin.unrealized_conversion_cast %92 : i64 to index
%94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%95 = builtin.unrealized_conversion_cast %93 : index to i64
%96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%98 = llvm.load %97 : !llvm.ptr<i64>
%99 = builtin.unrealized_conversion_cast %98 : i64 to index
%100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%101 = llvm.load %100 : !llvm.ptr<f32>
%102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
%104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%105 = builtin.unrealized_conversion_cast %arg3 : index to i64
%106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%108 = llvm.load %107 : !llvm.ptr<f32>
%109 = llvm.fadd %arg4, %108 : f32
scf.yield %109 : f32
}
%103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %102, %103 : !llvm.ptr<f32>
}
}
%68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%69 = llvm.load %68 : !llvm.ptr<f32>
llvm.return %69 : f32
}
}
While this is a good idea in general, it doesn’t seem to be useful here. When MLIR applies a pass, that pass is applied until quiescence, i.e. it keeps applying the pass until nothing changes (or until some limit on the number of applications is reached).
It seems that the convert-std-to-llvm pass has already been applied a few times since we see several ops from the LLVM dialect already present in the IR shown under the Input to convert-std-to-llvm section (for example, we see llvm.mlir.constant).
Another good place to look is in the output of the last pass right before we get our error. Let’s look at the result of the convert-math-to-llvm pass.
lines = result_string.splitlines()
lines = lines[lines.index(" Input to convert-math-to-llvm ")-1:]
lines = lines[:lines.index("")]
print("\n".join(lines))
=================================
Input to convert-math-to-llvm
=================================
module {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparsePointers64(%1, %arg0, %arg1) : (!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparsePointers64(!llvm.ptr<struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>, i64) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @func_f32(%arg0: !llvm.ptr<i8>) -> f32 {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = builtin.unrealized_conversion_cast %0 : i64 to index
%2 = llvm.mlir.constant(1 : index) : i64
%3 = builtin.unrealized_conversion_cast %2 : i64 to index
%4 = llvm.mlir.constant(2 : index) : i64
%5 = llvm.mlir.constant(1 : index) : i64
%6 = llvm.mlir.null : !llvm.ptr<f32>
%7 = llvm.getelementptr %6[%5] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%8 = llvm.ptrtoint %7 : !llvm.ptr<f32> to i64
%9 = llvm.mlir.addressof @__constant_xf32 : !llvm.ptr<f32>
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.getelementptr %9[%10] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%12 = llvm.mlir.constant(3735928559 : index) : i64
%13 = llvm.inttoptr %12 : i64 to !llvm.ptr<f32>
%14 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%15 = llvm.insertvalue %13, %14[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%16 = llvm.insertvalue %11, %15[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%17 = llvm.mlir.constant(0 : index) : i64
%18 = llvm.insertvalue %17, %16[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%19 = llvm.call @sparsePointers64(%arg0, %0) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%20 = builtin.unrealized_conversion_cast %19 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%21 = llvm.call @sparsePointers64(%arg0, %2) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%22 = builtin.unrealized_conversion_cast %21 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%23 = llvm.call @sparsePointers64(%arg0, %4) : (!llvm.ptr<i8>, i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%24 = builtin.unrealized_conversion_cast %23 : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xi64>
%25 = llvm.call @sparseValuesF32(%arg0) : (!llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%26 = builtin.unrealized_conversion_cast %25 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> to memref<?xf32>
%27 = llvm.mlir.constant(1 : index) : i64
%28 = llvm.mlir.null : !llvm.ptr<f32>
%29 = llvm.getelementptr %28[%27] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%30 = llvm.ptrtoint %29 : !llvm.ptr<f32> to i64
%31 = llvm.call @malloc(%30) : (i64) -> !llvm.ptr<i8>
%32 = llvm.bitcast %31 : !llvm.ptr<i8> to !llvm.ptr<f32>
%33 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%34 = llvm.insertvalue %32, %33[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%35 = llvm.insertvalue %32, %34[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%36 = llvm.mlir.constant(0 : index) : i64
%37 = llvm.insertvalue %36, %35[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%38 = llvm.mlir.constant(0 : index) : i64
%39 = llvm.mlir.constant(1 : index) : i64
%40 = llvm.alloca %39 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %18, %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%41 = llvm.bitcast %40 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%42 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%43 = llvm.insertvalue %38, %42[0] : !llvm.struct<(i64, ptr<i8>)>
%44 = llvm.insertvalue %41, %43[1] : !llvm.struct<(i64, ptr<i8>)>
%45 = llvm.mlir.constant(0 : index) : i64
%46 = llvm.mlir.constant(1 : index) : i64
%47 = llvm.alloca %46 x !llvm.struct<(ptr<f32>, ptr<f32>, i64)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
llvm.store %37, %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>>
%48 = llvm.bitcast %47 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64)>> to !llvm.ptr<i8>
%49 = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
%50 = llvm.insertvalue %45, %49[0] : !llvm.struct<(i64, ptr<i8>)>
%51 = llvm.insertvalue %48, %50[1] : !llvm.struct<(i64, ptr<i8>)>
%52 = llvm.mlir.constant(1 : index) : i64
%53 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %44, %53 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%54 = llvm.alloca %52 x !llvm.struct<(i64, ptr<i8>)> : (i64) -> !llvm.ptr<struct<(i64, ptr<i8>)>>
llvm.store %51, %54 : !llvm.ptr<struct<(i64, ptr<i8>)>>
%55 = llvm.mlir.constant(4 : index) : i64
llvm.call @memrefCopy(%55, %53, %54) : (i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>) -> ()
%56 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%57 = builtin.unrealized_conversion_cast %1 : index to i64
%58 = llvm.extractvalue %56[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%59 = llvm.getelementptr %58[%57] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%60 = llvm.load %59 : !llvm.ptr<i64>
%61 = builtin.unrealized_conversion_cast %60 : i64 to index
%62 = builtin.unrealized_conversion_cast %20 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%63 = builtin.unrealized_conversion_cast %3 : index to i64
%64 = llvm.extractvalue %62[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%65 = llvm.getelementptr %64[%63] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%66 = llvm.load %65 : !llvm.ptr<i64>
%67 = builtin.unrealized_conversion_cast %66 : i64 to index
scf.for %arg1 = %61 to %67 step %3 {
%70 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%71 = builtin.unrealized_conversion_cast %arg1 : index to i64
%72 = llvm.extractvalue %70[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%73 = llvm.getelementptr %72[%71] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%74 = llvm.load %73 : !llvm.ptr<i64>
%75 = builtin.unrealized_conversion_cast %74 : i64 to index
%76 = builtin.unrealized_conversion_cast %arg1 : index to i64
%77 = llvm.add %76, %2 : i64
%78 = builtin.unrealized_conversion_cast %77 : i64 to index
%79 = builtin.unrealized_conversion_cast %22 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%80 = builtin.unrealized_conversion_cast %78 : index to i64
%81 = llvm.extractvalue %79[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%82 = llvm.getelementptr %81[%80] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%83 = llvm.load %82 : !llvm.ptr<i64>
%84 = builtin.unrealized_conversion_cast %83 : i64 to index
scf.for %arg2 = %75 to %84 step %3 {
%85 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%86 = builtin.unrealized_conversion_cast %arg2 : index to i64
%87 = llvm.extractvalue %85[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%88 = llvm.getelementptr %87[%86] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%89 = llvm.load %88 : !llvm.ptr<i64>
%90 = builtin.unrealized_conversion_cast %89 : i64 to index
%91 = builtin.unrealized_conversion_cast %arg2 : index to i64
%92 = llvm.add %91, %2 : i64
%93 = builtin.unrealized_conversion_cast %92 : i64 to index
%94 = builtin.unrealized_conversion_cast %24 : memref<?xi64> to !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%95 = builtin.unrealized_conversion_cast %93 : index to i64
%96 = llvm.extractvalue %94[1] : !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
%97 = llvm.getelementptr %96[%95] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
%98 = llvm.load %97 : !llvm.ptr<i64>
%99 = builtin.unrealized_conversion_cast %98 : i64 to index
%100 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%101 = llvm.load %100 : !llvm.ptr<f32>
%102 = scf.for %arg3 = %90 to %99 step %3 iter_args(%arg4 = %101) -> (f32) {
%104 = builtin.unrealized_conversion_cast %26 : memref<?xf32> to !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%105 = builtin.unrealized_conversion_cast %arg3 : index to i64
%106 = llvm.extractvalue %104[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
%107 = llvm.getelementptr %106[%105] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
%108 = llvm.load %107 : !llvm.ptr<f32>
%109 = llvm.fadd %arg4, %108 : f32
scf.yield %109 : f32
}
%103 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
llvm.store %102, %103 : !llvm.ptr<f32>
}
}
%68 = llvm.extractvalue %37[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
%69 = llvm.load %68 : !llvm.ptr<f32>
llvm.return %69 : f32
}
}
We see that the ops are mostly ops from the standard, llvm, and builtin dialects. However, there are some ops from the scf dialect. It would make sense that the convert-std-to-llvm pass would be able to handle ops from the builtin dialect. It would make sense that it be able to handle ops from the llvm dialect since that’s the target diallect. It’s unclear whether or not the convert-std-to-llvm dialect can handle ops from the scf dialect. Given the name of the
convert-std-to-llvm pass, we can infer that it will mostly handle ops from the std dialect and cannot handle ops from the scf dialect. Let’s see if there are any passes that can convert from the scf dialect to the std dialect.
!mlir-opt --help | grep "scf"
Available Dialects: acc, affine, amx, arith, arm_neon, arm_sve, async, builtin, complex, dlti, emitc, gpu, linalg, llvm, math, memref, nvvm, omp, pdl, pdl_interp, quant, rocdl, scf, shape, sparse_tensor, spv, std, tensor, test, tosa, vector, x86vector
--async-parallel-for - Convert scf.parallel operations to multiple async compute ops executed concurrently for non-overlapping iteration ranges
--convert-linalg-tiled-loops-to-scf - Lower linalg tiled loops to SCF loops and parallel loops
--convert-openacc-to-scf - Convert the OpenACC ops to OpenACC with SCF dialect
--convert-parallel-loops-to-gpu - Convert mapped scf.parallel ops to gpu launch operations
--convert-scf-to-openmp - Convert SCF parallel loop to OpenMP parallel + workshare constructs.
--convert-scf-to-spirv - Convert SCF dialect to SPIR-V dialect.
--convert-scf-to-std - Convert SCF dialect to Standard dialect, replacing structured control flow with a CFG
--convert-vector-to-scf - Lower the operations from the vector dialect into the SCF dialect
--for-loop-canonicalization - Canonicalize operations within scf.for loop bodies
--scf-bufferize - Bufferize the scf dialect.
--scf-for-to-while - Convert SCF for loops to SCF while loops
--test-scf-for-utils - test scf.for utils
--test-scf-if-utils - test scf.if utils
--test-scf-pipelining - test scf.forOp pipelining
--test-vector-transfer-full-partial-split - Test conversion patterns to split transfer ops via scf.if + linalg ops
--tosa-to-scf - Lower TOSA to the SCF dialect
The pass convert-scf-to-std seems promising as it intends to convert the scf dialect to std dialect.
Let’s see if running the convert-scf-to-std pass right before the convert-std-to-llvm pass will get rid of our exception.
passes = [
"--sparsification",
"--sparse-tensor-conversion",
"--linalg-bufferize",
"--func-bufferize",
"--tensor-constant-bufferize",
"--tensor-bufferize",
"--finalizing-bufferize",
"--convert-linalg-to-loops",
"--convert-scf-to-std", # newly added
"--convert-memref-to-llvm",
"--convert-openmp-to-llvm",
"--convert-arith-to-llvm",
"--convert-math-to-llvm",
"--convert-std-to-llvm",
"--reconcile-unrealized-casts"
]
result = cli.apply_passes(mlir_bytes, passes)
print(result[:1500])
module attributes {llvm.data_layout = ""} {
llvm.func @memrefCopy(i64, !llvm.ptr<struct<(i64, ptr<i8>)>>, !llvm.ptr<struct<(i64, ptr<i8>)>>)
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.mlir.global private constant @__constant_xf32(0.000000e+00 : f32) : f32
llvm.func @sparseValuesF32(%arg0: !llvm.ptr<i8>) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = llvm.alloca %0 x !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.call @_mlir_ciface_sparseValuesF32(%1, %arg0) : (!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) -> ()
%2 = llvm.load %1 : !llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>
llvm.return %2 : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
}
llvm.func @_mlir_ciface_sparseValuesF32(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface, sym_visibility = "private"}
llvm.func @sparsePointers64(%arg0: !llvm.ptr<i8>, %arg1: i64) -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)> attributes {llvm.emit_c_interface, sym_visibility = "private"} {
%0 = llvm.mlir.constant(1 : index) : i64
%1 = ll
It looks like it fixed our issue!