Operator: aten._softmax.default
cnt: 6, ((T([80, 204, 204], f16), 2, False), {})
cnt: 6, ((T([80, 22, 22], f16), 2, False), {})
cnt: 6, ((T([80, 22, 204], f16), 2, False), {})
Operator: aten._softmax_backward_data.default
cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], f16), 2, f16), {})
cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], f16), 2, f16), {})
cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], f16), 2, f16), {})
Operator: aten._to_copy.default
cnt: 1, ((T([10, 22], b8),), {'dtype': f32})
cnt: 1, ((T([], f32),), {'dtype': f16})
cnt: 18, ((T([10, 22, 512], f32),), {'dtype': f16})
Operator: aten._unsafe_view.default
cnt: 1, ((T([220, 1014], f16), [10, 22, 1014]), {})
cnt: 12, ((T([8, 10, 22, 64], f16), [80, 22, 64]), {})
cnt: 30, ((T([10, 204, 8, 64], f16), [10, 204, 512]), {})
cnt: 24, ((T([10, 22, 8, 64], f16), [10, 22, 512]), {})
cnt: 6, ((T([8, 10, 204, 64], f16), [80, 204, 64]), {})
Operator: aten.add.Tensor
cnt: 1, ((T([10, 204, 512], f16), T([1, 204, 512], f16)), {})
cnt: 47, ((T([10, 204, 512], f16), T([10, 204, 512], f16)), {})
cnt: 1, ((T([10, 22, 22], b8, stride=(22, 0, 1)), T([10, 22, 22], u8, stride=(0, 22, 1))), {})
cnt: 1, ((T([10, 22, 512], f16), T([1, 22, 512], f16)), {})
cnt: 48, ((T([10, 22, 512], f16), T([10, 22, 512], f16)), {})
cnt: 1, ((T([], f16), 0), {})
cnt: 1, ((T([], f16), T([], f32)), {})
cnt: 1, ((T([1014, 512], f16), T([1014, 512], f16)), {})
Operator: aten.addmm.default
cnt: 1, ((T([512], f16), T([2040, 320], f16), T([320, 512], f16, stride=(1, 320))), {})
cnt: 36, ((T([512], f16), T([2040, 512], f16), T([512, 512], f16, stride=(1, 512))), {})
cnt: 6, ((T([2048], f16), T([2040, 512], f16), T([512, 2048], f16, stride=(1, 512))), {})
cnt: 6, ((T([512], f16), T([2040, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {})
cnt: 36, ((T([512], f16), T([220, 512], f16), T([512, 512], f16, stride=(1, 512))), {})
cnt: 6, ((T([2048], f16), T([220, 512], f16), T([512, 2048], f16, stride=(1, 512))), {})
cnt: 6, ((T([512], f16), T([220, 2048], f16), T([2048, 512], f16, stride=(1, 2048))), {})
Operator: aten.bmm.default
cnt: 12, ((T([80, 204, 64], f16), T([80, 64, 204], f16, stride=(13056, 1, 64))), {})
cnt: 12, ((T([80, 204, 204], f16), T([80, 204, 64], f16)), {})
cnt: 12, ((T([80, 22, 64], f16), T([80, 64, 22], f16, stride=(1408, 1, 64))), {})
cnt: 12, ((T([80, 22, 22], f16), T([80, 22, 64], f16)), {})
cnt: 12, ((T([80, 22, 64], f16), T([80, 64, 204], f16, stride=(13056, 1, 64))), {})
cnt: 12, ((T([80, 22, 204], f16), T([80, 204, 64], f16)), {})
cnt: 6, ((T([80, 204, 22], f16, stride=(4488, 1, 204)), T([80, 22, 64], f16)), {})
cnt: 6, ((T([80, 64, 22], f16, stride=(1408, 1, 64)), T([80, 22, 204], f16)), {})
cnt: 6, ((T([80, 22, 22], f16, stride=(484, 1, 22)), T([80, 22, 64], f16)), {})
cnt: 6, ((T([80, 64, 22], f16, stride=(1408, 1, 64)), T([80, 22, 22], f16)), {})
cnt: 6, ((T([80, 204, 204], f16, stride=(41616, 1, 204)), T([80, 204, 64], f16)), {})
cnt: 6, ((T([80, 64, 204], f16, stride=(13056, 1, 64)), T([80, 204, 204], f16)), {})
Operator: aten.cat.default
cnt: 1, (([T([1], i64), T([17], i64)],), {})
cnt: 1, (([T([1], i64), T([15], i64)],), {})
cnt: 1, (([T([1], i64), T([21], i64)],), {})
cnt: 1, (([T([1], i64), T([18], i64)],), {})
cnt: 3, (([T([1], i64), T([9], i64)],), {})
cnt: 1, (([T([1], i64), T([12], i64)],), {})
cnt: 1, (([T([1], i64), T([11], i64)],), {})
cnt: 1, (([T([1], i64), T([10], i64)],), {})
cnt: 1, (([T([17], i64), T([1], i64)],), {})
cnt: 1, (([T([15], i64), T([1], i64)],), {})
cnt: 1, (([T([21], i64), T([1], i64)],), {})
cnt: 1, (([T([18], i64), T([1], i64)],), {})
cnt: 3, (([T([9], i64), T([1], i64)],), {})
cnt: 1, (([T([12], i64), T([1], i64)],), {})
cnt: 1, (([T([11], i64), T([1], i64)],), {})
cnt: 1, (([T([10], i64), T([1], i64)],), {})
Operator: aten.clone.default
cnt: 1, ((T([10, 204, 320], f16),), {})
cnt: 1, ((T([10], i64),), {})
cnt: 1, ((T([10, 21], i64),), {})
Operator: aten.copy_.default
cnt: 1, ((T([10, 204, 320], f16), T([10, 204, 320], f16)), {})
cnt: 7, ((T([10], i64), T([10], i64)), {})
cnt: 1, ((T([10, 21], i64), T([10, 21], i64)), {})
cnt: 2, ((T([18], i64), T([18], i64)), {})
cnt: 2, ((T([16], i64), T([16], i64)), {})
cnt: 2, ((T([22], i64), T([22], i64)), {})
cnt: 2, ((T([19], i64), T([19], i64)), {})
cnt: 2, ((T([13], i64), T([13], i64)), {})
cnt: 2, ((T([12], i64), T([12], i64)), {})
cnt: 2, ((T([11], i64), T([11], i64)), {})
Operator: aten.div.Tensor
cnt: 12, ((T([80, 204, 204], f16), 8.0), {})
cnt: 12, ((T([80, 22, 22], f16), 8.0), {})
cnt: 12, ((T([80, 22, 204], f16), 8.0), {})
cnt: 2, ((T([], f16), 223080), {})
cnt: 1, ((T([], i64), 220), {})
cnt: 2, ((T([], f32), 2), {})
Operator: aten.embedding.default
cnt: 1, ((T([1014, 512], f16), T([10, 22], i64)), {})
Operator: aten.embedding_dense_backward.default
cnt: 1, ((T([10, 22, 512], f16), T([10, 22], i64), 1014, -1, False), {})
Operator: aten.eq.Scalar
cnt: 1, ((T([10, 22], i64), 2), {})
Operator: aten.fill_.Scalar
cnt: 1, ((T([10, 22], i64), 2), {})
cnt: 1, ((T([10, 22], i64), -1), {})
Operator: aten.fill_.Tensor
cnt: 3, ((T([0], f16), T([], f16)), {})
cnt: 3, ((T([4], f16), T([], f16)), {})
cnt: 3, ((T([8], f16), T([], f16)), {})
cnt: 3, ((T([24], f16), T([], f16)), {})
cnt: 3, ((T([57], f16), T([], f16)), {})
cnt: 3, ((T([67], f16), T([], f16)), {})
cnt: 3, ((T([75], f16), T([], f16)), {})
cnt: 3, ((T([91], f16), T([], f16)), {})
cnt: 3, ((T([99], f16), T([], f16)), {})
cnt: 3, ((T([118], f16), T([], f16)), {})
Operator: aten.gt.Scalar
cnt: 1, ((T([10, 22, 22], u8), 0), {})
Operator: aten.index.Tensor
cnt: 10, ((T([21], i64), [T([21], b8)]), {})
Operator: aten.lt.Scalar
cnt: 2, ((T([10, 204], f16), 1), {})
Operator: aten.masked_fill.Scalar
cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], b8), -inf), {})
cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], b8), -inf), {})
cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], b8), -inf), {})
cnt: 6, ((T([80, 22, 204], f16), T([80, 22, 204], b8), 0), {})
cnt: 6, ((T([80, 22, 22], f16), T([80, 22, 22], b8), 0), {})
cnt: 6, ((T([80, 204, 204], f16), T([80, 204, 204], b8), 0), {})
Operator: aten.mm.default
cnt: 1, ((T([220, 512], f16), T([512, 1014], f16, stride=(1, 512))), {})
cnt: 1, ((T([1014, 220], f16, stride=(0, 0)), T([220, 512], f16)), {})
cnt: 1, ((T([220, 1014], f16, stride=(0, 0)), T([1014, 512], f16)), {})
cnt: 6, ((T([220, 512], f16), T([512, 2048], f16)), {})
cnt: 6, ((T([512, 220], f16, stride=(1, 512)), T([220, 2048], f16)), {})
cnt: 6, ((T([220, 2048], f16), T([2048, 512], f16)), {})
cnt: 6, ((T([2048, 220], f16, stride=(1, 2048)), T([220, 512], f16)), {})
cnt: 36, ((T([220, 512], f16), T([512, 512], f16)), {})
cnt: 36, ((T([512, 220], f16, stride=(1, 512)), T([220, 512], f16)), {})
cnt: 36, ((T([2040, 512], f16), T([512, 512], f16)), {})
cnt: 36, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 512], f16)), {})
cnt: 6, ((T([2040, 512], f16), T([512, 2048], f16)), {})
cnt: 6, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 2048], f16)), {})
cnt: 6, ((T([2040, 2048], f16), T([2048, 512], f16)), {})
cnt: 6, ((T([2048, 2040], f16, stride=(1, 2048)), T([2040, 512], f16)), {})
cnt: 1, ((T([512, 2040], f16, stride=(1, 512)), T([2040, 320], f16)), {})
Operator: aten.mul.Tensor
cnt: 2, ((T([10, 22, 512], f16), 22.627416997969522), {})
cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 1], f32)), {})
cnt: 12, ((T([10, 204, 512], f16), T([10, 204, 1], f16)), {})
Operator: aten.mul_.Tensor
cnt: 12, ((T([10, 204, 512], f16), T([10, 204, 1], f16)), {})
cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 1], f32)), {})
Operator: aten.native_layer_norm.default
cnt: 13, ((T([10, 204, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {})
cnt: 18, ((T([10, 22, 512], f16), [512], T([512], f16), T([512], f16), 1e-05), {})
Operator: aten.native_layer_norm_backward.default
cnt: 18, ((T([10, 22, 512], f16), T([10, 22, 512], f16), [512], T([10, 22, 1], f32), T([10, 22, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
cnt: 13, ((T([10, 204, 512], f16), T([10, 204, 512], f16), [512], T([10, 204, 1], f32), T([10, 204, 1], f32), T([512], f16), T([512], f16), [True, True, True]), {})
Operator: aten.ne.Scalar
cnt: 10, ((T([21], i64), -1), {})
cnt: 1, ((T([10, 22], i64), 2), {})
Operator: aten.new_ones.default
cnt: 2, ((T([10, 204, 320], f16), [10, 204]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False})
cnt: 1, ((T([10, 204, 512], f16), [10, 204]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda', 'pin_memory': False})
Operator: aten.relu.default
cnt: 6, ((T([10, 204, 2048], f16),), {})
cnt: 6, ((T([10, 22, 2048], f16),), {})
Operator: aten.repeat.default
cnt: 6, ((T([10, 204, 204], b8, stride=(204, 0, 1)), [8, 1, 1]), {})
cnt: 6, ((T([10, 22, 22], b8), [8, 1, 1]), {})
cnt: 6, ((T([10, 22, 204], b8, stride=(204, 0, 1)), [8, 1, 1]), {})
Operator: aten.sum.SymInt
cnt: 42, ((T([220, 512], f16), [0], True), {})
cnt: 6, ((T([220, 2048], f16), [0], True), {})
cnt: 43, ((T([2040, 512], f16), [0], True), {})
cnt: 6, ((T([2040, 2048], f16), [0], True), {})
Operator: aten.sum.default
cnt: 1, ((T([10, 22, 1014], f16),), {})
cnt: 1, ((T([10, 22], i64),), {})
Operator: aten.threshold_backward.default
cnt: 6, ((T([10, 22, 2048], f16), T([10, 22, 2048], f16), 0), {})
cnt: 6, ((T([10, 204, 2048], f16), T([10, 204, 2048], f16), 0), {})
Operator: aten.triu.default
cnt: 1, ((T([22, 22], u8), 1), {})
Operator: aten.unbind.int
cnt: 1, ((T([10, 21], i64),), {})
