|
94 | 94 | (%93) = "pd_op.matmul" (%91, %3) {is_persistable:[false],stop_gradient:[false],transpose_x:false,transpose_y:false} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4096x4096xf16>) -> builtin.tensor<-1x-1x4096xf16> |
95 | 95 | (%94) = "pd_op.matmul" (%91, %4) {is_persistable:[false],stop_gradient:[false],transpose_x:false,transpose_y:false} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4096x4096xf16>) -> builtin.tensor<-1x-1x4096xf16> |
96 | 96 | (%95) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)0,(Int64)0,(Int64)32,(Int64)128]} : () -> builtin.tensor<4xi64> |
97 | | - (%96, %97) = "pd_op.reshape" (%92, %95) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16>, builtin.tensor<0x-1x-1x4096xf16> |
| 97 | + (%96) = "pd_op.reshape" (%92, %95) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16> |
98 | 98 | (%98) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)0,(Int64)0,(Int64)32,(Int64)128]} : () -> builtin.tensor<4xi64> |
99 | | - (%99, %100) = "pd_op.reshape" (%93, %98) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16>, builtin.tensor<0x-1x-1x4096xf16> |
| 99 | + (%99) = "pd_op.reshape" (%93, %98) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16> |
100 | 100 | (%101) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)0,(Int64)0,(Int64)32,(Int64)128]} : () -> builtin.tensor<4xi64> |
101 | | - (%102, %103) = "pd_op.reshape" (%94, %101) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16>, builtin.tensor<0x-1x-1x4096xf16> |
| 101 | + (%102) = "pd_op.reshape" (%94, %101) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4xi64>) -> builtin.tensor<-1x-1x32x128xf16> |
102 | 102 | (%104) = "pd_op.shape" (%99) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x-1x32x128xf16>) -> builtin.tensor<4xi32> |
103 | 103 | (%105) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)1]} : () -> builtin.tensor<1xi64> |
104 | 104 | (%106) = "pd_op.full_int_array" () {dtype:(pd_op.DataType)int64,place:(pd_op.Place)Place(cpu),stop_gradient:[true],value:[(Int64)2]} : () -> builtin.tensor<1xi64> |
|
173 | 173 | (%181) = "pd_op.matmul" (%179, %180) {is_persistable:[false],stop_gradient:[false],transpose_x:false,transpose_y:false} : (builtin.tensor<-1x32x-1x128xf16>, builtin.tensor<-1x32x128x-1xf16>) -> builtin.tensor<-1x32x-1x-1xf16> |
174 | 174 | (%182) = "pd_op.full" () {dtype:(pd_op.DataType)int32,is_persistable:[false],place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[false],value:(Float)1} : () -> builtin.tensor<1xi32> |
175 | 175 | (%183) = "builtin.combine" (%167, %182, %170, %174) {} : (builtin.tensor<i32>, builtin.tensor<1xi32>, builtin.tensor<i32>, builtin.tensor<i32>) -> vec[builtin.tensor<i32>,builtin.tensor<1xi32>,builtin.tensor<i32>,builtin.tensor<i32>] |
176 | | - (%184, %185) = "pd_op.reshape" (%82, %183) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x1x-1x-1xf16>, vec[builtin.tensor<i32>,builtin.tensor<1xi32>,builtin.tensor<i32>,builtin.tensor<i32>]) -> builtin.tensor<-1x1x-1x-1xf16>, builtin.tensor<0x-1x1x-1x-1xf16> |
| 176 | + (%184) = "pd_op.reshape" (%82, %183) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x1x-1x-1xf16>, vec[builtin.tensor<i32>,builtin.tensor<1xi32>,builtin.tensor<i32>,builtin.tensor<i32>]) -> builtin.tensor<-1x1x-1x-1xf16> |
177 | 177 | (%186) = "pd_op.add" (%181, %184) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x32x-1x-1xf16>, builtin.tensor<-1x1x-1x-1xf16>) -> builtin.tensor<-1x32x-1x-1xf16> |
178 | 178 | (%187) = "pd_op.cast" (%186) {dtype:(pd_op.DataType)float32,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x32x-1x-1xf16>) -> builtin.tensor<-1x32x-1x-1xf32> |
179 | 179 | (%188) = "pd_op.softmax" (%187) {axis:(Int32)-1,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x32x-1x-1xf32>) -> builtin.tensor<-1x32x-1x-1xf32> |
|
182 | 182 | (%191) = "pd_op.transpose" (%190) {is_persistable:[false],perm:[(Int32)0,(Int32)2,(Int32)1,(Int32)3],stop_gradient:[false]} : (builtin.tensor<-1x32x-1x128xf16>) -> builtin.tensor<-1x-1x32x128xf16> |
183 | 183 | (%192) = "pd_op.full" () {dtype:(pd_op.DataType)int32,is_persistable:[false],place:(pd_op.Place)Place(cpu),shape:(pd_op.IntArray)[1],stop_gradient:[false],value:(Float)4096} : () -> builtin.tensor<1xi32> |
184 | 184 | (%193) = "builtin.combine" (%167, %170, %192) {} : (builtin.tensor<i32>, builtin.tensor<i32>, builtin.tensor<1xi32>) -> vec[builtin.tensor<i32>,builtin.tensor<i32>,builtin.tensor<1xi32>] |
185 | | - (%194, %195) = "pd_op.reshape" (%191, %193) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x32x128xf16>, vec[builtin.tensor<i32>,builtin.tensor<i32>,builtin.tensor<1xi32>]) -> builtin.tensor<-1x-1x4096xf16>, builtin.tensor<0x-1x-1x32x128xf16> |
| 185 | + (%194) = "pd_op.reshape" (%191, %193) {is_persistable:[false,false],stop_gradient:[false,false]} : (builtin.tensor<-1x-1x32x128xf16>, vec[builtin.tensor<i32>,builtin.tensor<i32>,builtin.tensor<1xi32>]) -> builtin.tensor<-1x-1x4096xf16> |
186 | 186 | (%196) = "pd_op.matmul" (%194, %7) {is_persistable:[false],stop_gradient:[false],transpose_x:false,transpose_y:false} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<4096x4096xf16>) -> builtin.tensor<-1x-1x4096xf16> |
187 | 187 | (%197) = "pd_op.add" (%49, %196) {is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x-1x4096xf16>, builtin.tensor<-1x-1x4096xf16>) -> builtin.tensor<-1x-1x4096xf16> |
188 | 188 | (%198) = "pd_op.cast" (%197) {dtype:(pd_op.DataType)float32,is_persistable:[false],stop_gradient:[false]} : (builtin.tensor<-1x-1x4096xf16>) -> builtin.tensor<-1x-1x4096xf32> |
|
0 commit comments