class (torch.nn.Module): def forward(self, arg0_1: "f32[1, 3, 1088, 1088]", arg1_1: "f32[128, 3, 4, 4]", arg2_1: "f32[128]", arg3_1: "f32[128]", arg4_1: "f32[128]", arg5_1: "f32[128]", arg6_1: "f32[128]", arg7_1: "f32[384, 128]", arg8_1: "f32[384]", arg9_1: "f32[169, 4]", arg10_1: "i64[49, 49]", arg11_1: "f32[128, 128]", arg12_1: "f32[128]", arg13_1: "f32[128]", arg14_1: "f32[128]", arg15_1: "f32[512, 128]", arg16_1: "f32[512]", arg17_1: "f32[128, 512]", arg18_1: "f32[128]", arg19_1: "f32[128]", arg20_1: "f32[128]", arg21_1: "f32[384, 128]", arg22_1: "f32[384]", arg23_1: "f32[169, 4]", arg24_1: "i64[49, 49]", arg25_1: "f32[128, 128]", arg26_1: "f32[128]", arg27_1: "f32[128]", arg28_1: "f32[128]", arg29_1: "f32[512, 128]", arg30_1: "f32[512]", arg31_1: "f32[128, 512]", arg32_1: "f32[128]", arg33_1: "f32[512]", arg34_1: "f32[512]", arg35_1: "f32[256, 512]", arg36_1: "f32[128]", arg37_1: "f32[128]", arg38_1: "f32[256]", arg39_1: "f32[256]", arg40_1: "f32[768, 256]", arg41_1: "f32[768]", arg42_1: "f32[169, 8]", arg43_1: "i64[49, 49]", arg44_1: "f32[256, 256]", arg45_1: "f32[256]", arg46_1: "f32[256]", arg47_1: "f32[256]", arg48_1: "f32[1024, 256]", arg49_1: "f32[1024]", arg50_1: "f32[256, 1024]", arg51_1: "f32[256]", arg52_1: "f32[256]", arg53_1: "f32[256]", arg54_1: "f32[768, 256]", arg55_1: "f32[768]", arg56_1: "f32[169, 8]", arg57_1: "i64[49, 49]", arg58_1: "f32[256, 256]", arg59_1: "f32[256]", arg60_1: "f32[256]", arg61_1: "f32[256]", arg62_1: "f32[1024, 256]", arg63_1: "f32[1024]", arg64_1: "f32[256, 1024]", arg65_1: "f32[256]", arg66_1: "f32[1024]", arg67_1: "f32[1024]", arg68_1: "f32[512, 1024]", arg69_1: "f32[256]", arg70_1: "f32[256]", arg71_1: "f32[512]", arg72_1: "f32[512]", arg73_1: "f32[1536, 512]", arg74_1: "f32[1536]", arg75_1: "f32[169, 16]", arg76_1: "i64[49, 49]", arg77_1: "f32[512, 512]", arg78_1: "f32[512]", arg79_1: "f32[512]", arg80_1: "f32[512]", arg81_1: "f32[2048, 512]", arg82_1: "f32[2048]", arg83_1: "f32[512, 2048]", arg84_1: "f32[512]", arg85_1: "f32[512]", arg86_1: "f32[512]", arg87_1: "f32[1536, 512]", arg88_1: "f32[1536]", arg89_1: "f32[169, 16]", arg90_1: "i64[49, 49]", arg91_1: "f32[512, 512]", arg92_1: "f32[512]", arg93_1: "f32[512]", arg94_1: "f32[512]", arg95_1: "f32[2048, 512]", arg96_1: "f32[2048]", arg97_1: "f32[512, 2048]", arg98_1: "f32[512]", arg99_1: "f32[512]", arg100_1: "f32[512]", arg101_1: "f32[1536, 512]", arg102_1: "f32[1536]", arg103_1: "f32[169, 16]", arg104_1: "i64[49, 49]", arg105_1: "f32[512, 512]", arg106_1: "f32[512]", arg107_1: "f32[512]", arg108_1: "f32[512]", arg109_1: "f32[2048, 512]", arg110_1: "f32[2048]", arg111_1: "f32[512, 2048]", arg112_1: "f32[512]", arg113_1: "f32[512]", arg114_1: "f32[512]", arg115_1: "f32[1536, 512]", arg116_1: "f32[1536]", arg117_1: "f32[169, 16]", arg118_1: "i64[49, 49]", arg119_1: "f32[512, 512]", arg120_1: "f32[512]", arg121_1: "f32[512]", arg122_1: "f32[512]", arg123_1: "f32[2048, 512]", arg124_1: "f32[2048]", arg125_1: "f32[512, 2048]", arg126_1: "f32[512]", arg127_1: "f32[512]", arg128_1: "f32[512]", arg129_1: "f32[1536, 512]", arg130_1: "f32[1536]", arg131_1: "f32[169, 16]", arg132_1: "i64[49, 49]", arg133_1: "f32[512, 512]", arg134_1: "f32[512]", arg135_1: "f32[512]", arg136_1: "f32[512]", arg137_1: "f32[2048, 512]", arg138_1: "f32[2048]", arg139_1: "f32[512, 2048]", arg140_1: "f32[512]", arg141_1: "f32[512]", arg142_1: "f32[512]", arg143_1: "f32[1536, 512]", arg144_1: "f32[1536]", arg145_1: "f32[169, 16]", arg146_1: "i64[49, 49]", arg147_1: "f32[512, 512]", arg148_1: "f32[512]", arg149_1: "f32[512]", arg150_1: "f32[512]", arg151_1: "f32[2048, 512]", arg152_1: "f32[2048]", arg153_1: "f32[512, 2048]", arg154_1: "f32[512]", arg155_1: "f32[512]", arg156_1: "f32[512]", arg157_1: "f32[1536, 512]", arg158_1: "f32[1536]", arg159_1: "f32[169, 16]", arg160_1: "i64[49, 49]", arg161_1: "f32[512, 512]", arg162_1: "f32[512]", arg163_1: "f32[512]", arg164_1: "f32[512]", arg165_1: "f32[2048, 512]", arg166_1: "f32[2048]", arg167_1: "f32[512, 2048]", arg168_1: "f32[512]", arg169_1: "f32[512]", arg170_1: "f32[512]", arg171_1: "f32[1536, 512]", arg172_1: "f32[1536]", arg173_1: "f32[169, 16]", arg174_1: "i64[49, 49]", arg175_1: "f32[512, 512]", arg176_1: "f32[512]", arg177_1: "f32[512]", arg178_1: "f32[512]", arg179_1: "f32[2048, 512]", arg180_1: "f32[2048]", arg181_1: "f32[512, 2048]", arg182_1: "f32[512]", arg183_1: "f32[512]", arg184_1: "f32[512]", arg185_1: "f32[1536, 512]", arg186_1: "f32[1536]", arg187_1: "f32[169, 16]", arg188_1: "i64[49, 49]", arg189_1: "f32[512, 512]", arg190_1: "f32[512]", arg191_1: "f32[512]", arg192_1: "f32[512]", arg193_1: "f32[2048, 512]", arg194_1: "f32[2048]", arg195_1: "f32[512, 2048]", arg196_1: "f32[512]", arg197_1: "f32[512]", arg198_1: "f32[512]", arg199_1: "f32[1536, 512]", arg200_1: "f32[1536]", arg201_1: "f32[169, 16]", arg202_1: "i64[49, 49]", arg203_1: "f32[512, 512]", arg204_1: "f32[512]", arg205_1: "f32[512]", arg206_1: "f32[512]", arg207_1: "f32[2048, 512]", arg208_1: "f32[2048]", arg209_1: "f32[512, 2048]", arg210_1: "f32[512]", arg211_1: "f32[512]", arg212_1: "f32[512]", arg213_1: "f32[1536, 512]", arg214_1: "f32[1536]", arg215_1: "f32[169, 16]", arg216_1: "i64[49, 49]", arg217_1: "f32[512, 512]", arg218_1: "f32[512]", arg219_1: "f32[512]", arg220_1: "f32[512]", arg221_1: "f32[2048, 512]", arg222_1: "f32[2048]", arg223_1: "f32[512, 2048]", arg224_1: "f32[512]", arg225_1: "f32[512]", arg226_1: "f32[512]", arg227_1: "f32[1536, 512]", arg228_1: "f32[1536]", arg229_1: "f32[169, 16]", arg230_1: "i64[49, 49]", arg231_1: "f32[512, 512]", arg232_1: "f32[512]", arg233_1: "f32[512]", arg234_1: "f32[512]", arg235_1: "f32[2048, 512]", arg236_1: "f32[2048]", arg237_1: "f32[512, 2048]", arg238_1: "f32[512]", arg239_1: "f32[512]", arg240_1: "f32[512]", arg241_1: "f32[1536, 512]", arg242_1: "f32[1536]", arg243_1: "f32[169, 16]", arg244_1: "i64[49, 49]", arg245_1: "f32[512, 512]", arg246_1: "f32[512]", arg247_1: "f32[512]", arg248_1: "f32[512]", arg249_1: "f32[2048, 512]", arg250_1: "f32[2048]", arg251_1: "f32[512, 2048]", arg252_1: "f32[512]", arg253_1: "f32[512]", arg254_1: "f32[512]", arg255_1: "f32[1536, 512]", arg256_1: "f32[1536]", arg257_1: "f32[169, 16]", arg258_1: "i64[49, 49]", arg259_1: "f32[512, 512]", arg260_1: "f32[512]", arg261_1: "f32[512]", arg262_1: "f32[512]", arg263_1: "f32[2048, 512]", arg264_1: "f32[2048]", arg265_1: "f32[512, 2048]", arg266_1: "f32[512]", arg267_1: "f32[512]", arg268_1: "f32[512]", arg269_1: "f32[1536, 512]", arg270_1: "f32[1536]", arg271_1: "f32[169, 16]", arg272_1: "i64[49, 49]", arg273_1: "f32[512, 512]", arg274_1: "f32[512]", arg275_1: "f32[512]", arg276_1: "f32[512]", arg277_1: "f32[2048, 512]", arg278_1: "f32[2048]", arg279_1: "f32[512, 2048]", arg280_1: "f32[512]", arg281_1: "f32[512]", arg282_1: "f32[512]", arg283_1: "f32[1536, 512]", arg284_1: "f32[1536]", arg285_1: "f32[169, 16]", arg286_1: "i64[49, 49]", arg287_1: "f32[512, 512]", arg288_1: "f32[512]", arg289_1: "f32[512]", arg290_1: "f32[512]", arg291_1: "f32[2048, 512]", arg292_1: "f32[2048]", arg293_1: "f32[512, 2048]", arg294_1: "f32[512]", arg295_1: "f32[512]", arg296_1: "f32[512]", arg297_1: "f32[1536, 512]", arg298_1: "f32[1536]", arg299_1: "f32[169, 16]", arg300_1: "i64[49, 49]", arg301_1: "f32[512, 512]", arg302_1: "f32[512]", arg303_1: "f32[512]", arg304_1: "f32[512]", arg305_1: "f32[2048, 512]", arg306_1: "f32[2048]", arg307_1: "f32[512, 2048]", arg308_1: "f32[512]", arg309_1: "f32[512]", arg310_1: "f32[512]", arg311_1: "f32[1536, 512]", arg312_1: "f32[1536]", arg313_1: "f32[169, 16]", arg314_1: "i64[49, 49]", arg315_1: "f32[512, 512]", arg316_1: "f32[512]", arg317_1: "f32[512]", arg318_1: "f32[512]", arg319_1: "f32[2048, 512]", arg320_1: "f32[2048]", arg321_1: "f32[512, 2048]", arg322_1: "f32[512]", arg323_1: "f32[512]", arg324_1: "f32[512]"): # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/conv.py:453 in _conv_forward, code: return F.conv2d(input, weight, bias, self.stride, convert_element_type_2: "f16[1, 3, 1088, 1088]" = torch.ops.prims.convert_element_type.default(arg0_1, torch.float16); arg0_1 = None convert_element_type_1: "f16[128, 3, 4, 4]" = torch.ops.prims.convert_element_type.default(arg1_1, torch.float16); arg1_1 = None convert_element_type: "f16[128]" = torch.ops.prims.convert_element_type.default(arg2_1, torch.float16); arg2_1 = None convolution: "f16[1, 128, 272, 272]" = torch.ops.aten.convolution.default(convert_element_type_2, convert_element_type_1, convert_element_type, [4, 4], [0, 0], [1, 1], False, [0, 0], 1); convert_element_type_2 = convert_element_type_1 = convert_element_type = None # File: /workspace/networks/encoders/swin/swin_transformer.py:520 in forward, code: x = x.flatten(2).transpose(1, 2) view: "f16[1, 128, 73984]" = torch.ops.aten.reshape.default(convolution, [1, 128, 73984]); convolution = None permute: "f16[1, 73984, 128]" = torch.ops.aten.permute.default(view, [0, 2, 1]); view = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_3: "f32[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(permute, torch.float32); permute = None clone: "f32[1, 73984, 128]" = torch.ops.aten.clone.default(convert_element_type_3, memory_format = torch.contiguous_format); convert_element_type_3 = None var_mean = torch.ops.aten.var_mean.correction(clone, [2], correction = 0, keepdim = True) getitem: "f32[1, 73984, 1]" = var_mean[0] getitem_1: "f32[1, 73984, 1]" = var_mean[1]; var_mean = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant2: "f32[]" = self._tensor_constant2 _tensor_constant3: "f32[]" = self._tensor_constant3 _tensor_constant4: "f32[]" = self._tensor_constant4 _tensor_constant5: "f32[]" = self._tensor_constant5 _tensor_constant6: "f32[]" = self._tensor_constant6 _tensor_constant7: "f32[]" = self._tensor_constant7 _tensor_constant8: "f32[]" = self._tensor_constant8 _tensor_constant9: "f32[]" = self._tensor_constant9 _tensor_constant10: "f32[]" = self._tensor_constant10 # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(clone, getitem_1); clone = getitem_1 = None add: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None rsqrt: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add); add = None mul: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = rsqrt = None mul_1: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul, arg3_1); mul = arg3_1 = None add_1: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_1, arg4_1); mul_1 = arg4_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:522 in forward, code: x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) permute_1: "f32[1, 128, 73984]" = torch.ops.aten.permute.default(add_1, [0, 2, 1]); add_1 = None view_1: "f32[1, 128, 272, 272]" = torch.ops.aten.reshape.default(permute_1, [-1, 128, 272, 272]); permute_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:703 in forward, code: x = x.flatten(2).transpose(1, 2) view_2: "f32[1, 128, 73984]" = torch.ops.aten.reshape.default(view_1, [1, 128, 73984]); view_1 = None permute_2: "f32[1, 73984, 128]" = torch.ops.aten.permute.default(view_2, [0, 2, 1]); view_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_1 = torch.ops.aten.var_mean.correction(permute_2, [2], correction = 0, keepdim = True) getitem_2: "f32[1, 73984, 1]" = var_mean_1[0] getitem_3: "f32[1, 73984, 1]" = var_mean_1[1]; var_mean_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_17: "f16[128]" = torch.ops.prims.convert_element_type.default(arg12_1, torch.float16); arg12_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_6: "f16[384]" = torch.ops.prims.convert_element_type.default(arg8_1, torch.float16); arg8_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_2: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(permute_2, getitem_3); getitem_3 = None add_2: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_2, 1e-05); getitem_2 = None rsqrt_1: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_2); add_2 = None mul_4: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_1); sub_2 = rsqrt_1 = None mul_5: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_4, arg5_1); mul_4 = arg5_1 = None add_3: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_5, arg6_1); mul_5 = arg6_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_7: "f32[1, 272, 272, 128]" = torch.ops.aten.reshape.default(add_3, [1, 272, 272, 128]); add_3 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd: "f32[1, 273, 273, 128]" = torch.ops.aten.constant_pad_nd.default(view_7, [0, 0, 0, 1, 0, 1], 0.0); view_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_8: "f32[1, 39, 7, 39, 7, 128]" = torch.ops.aten.reshape.default(constant_pad_nd, [1, 39, 7, 39, 7, 128]); constant_pad_nd = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_5: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.permute.default(view_8, [0, 1, 3, 2, 4, 5]); view_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_3: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.clone.default(permute_5, memory_format = torch.contiguous_format); permute_5 = None view_9: "f32[1521, 7, 7, 128]" = torch.ops.aten.reshape.default(clone_3, [-1, 7, 7, 128]); clone_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_10: "f32[1521, 49, 128]" = torch.ops.aten.reshape.default(view_9, [-1, 49, 128]); view_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_8: "f16[1521, 49, 128]" = torch.ops.prims.convert_element_type.default(view_10, torch.float16); view_10 = None view_11: "f16[74529, 128]" = torch.ops.aten.reshape.default(convert_element_type_8, [74529, 128]); convert_element_type_8 = None convert_element_type_7: "f16[384, 128]" = torch.ops.prims.convert_element_type.default(arg7_1, torch.float16); arg7_1 = None permute_6: "f16[128, 384]" = torch.ops.aten.permute.default(convert_element_type_7, [1, 0]); convert_element_type_7 = None # No stacktrace found for following nodes mm_default_87: "f16[74529, 384]" = torch.ops.aten.mm.default(view_11, permute_6); view_11 = permute_6 = None add_tensor_87: "f16[74529, 384]" = torch.ops.aten.add.Tensor(mm_default_87, convert_element_type_6); mm_default_87 = convert_element_type_6 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_12: "f16[1521, 49, 384]" = torch.ops.aten.reshape.default(add_tensor_87, [1521, 49, 384]); add_tensor_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_13: "f16[1521, 49, 3, 4, 32]" = torch.ops.aten.reshape.default(view_12, [1521, 49, 3, 4, 32]); view_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_7: "f16[3, 1521, 4, 49, 32]" = torch.ops.aten.permute.default(view_13, [2, 0, 3, 1, 4]); view_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_6: "f16[1521, 4, 49, 32]" = torch.ops.aten.mul.Tensor(select, 0.1767766952966369); select = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(mul_6, [1521, 4, 49, 32]); mul_6 = None clone_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand, memory_format = torch.contiguous_format); expand = None view_14: "f16[6084, 49, 32]" = torch.ops.aten.reshape.default(clone_4, [6084, 49, 32]); clone_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_1: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_8: "f16[1521, 4, 32, 49]" = torch.ops.aten.permute.default(select_1, [0, 1, 3, 2]); select_1 = None expand_1: "f16[1521, 4, 32, 49]" = torch.ops.aten.expand.default(permute_8, [1521, 4, 32, 49]); permute_8 = None clone_5: "f16[1521, 4, 32, 49]" = torch.ops.aten.clone.default(expand_1, memory_format = torch.contiguous_format); expand_1 = None view_15: "f16[6084, 32, 49]" = torch.ops.aten.reshape.default(clone_5, [6084, 32, 49]); clone_5 = None bmm: "f16[6084, 49, 49]" = torch.ops.aten.bmm.default(view_14, view_15); view_14 = view_15 = None view_16: "f16[1521, 4, 49, 49]" = torch.ops.aten.reshape.default(bmm, [1521, 4, 49, 49]); bmm = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_17: "i64[2401]" = torch.ops.aten.reshape.default(arg10_1, [-1]); arg10_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index: "f32[2401, 4]" = torch.ops.aten.index.Tensor(arg9_1, [view_17]); arg9_1 = view_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_18: "f32[49, 49, 4]" = torch.ops.aten.reshape.default(index, [49, 49, -1]); index = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_9: "f32[4, 49, 49]" = torch.ops.aten.permute.default(view_18, [2, 0, 1]); view_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_6: "f32[4, 49, 49]" = torch.ops.aten.clone.default(permute_9, memory_format = torch.contiguous_format); permute_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_2: "f32[1, 4, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_6, 0); clone_6 = None add_4: "f32[1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_16, unsqueeze_2); view_16 = unsqueeze_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax: "f32[1521, 4, 49, 1]" = torch.ops.aten.amax.default(add_4, [-1], True) sub_3: "f32[1521, 4, 49, 49]" = torch.ops.aten.sub.Tensor(add_4, amax); add_4 = amax = None exp: "f32[1521, 4, 49, 49]" = torch.ops.aten.exp.default(sub_3); sub_3 = None sum_1: "f32[1521, 4, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp, [-1], True) div_2: "f32[1521, 4, 49, 49]" = torch.ops.aten.div.Tensor(exp, sum_1); exp = sum_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_14: "f16[1521, 4, 49, 49]" = torch.ops.prims.convert_element_type.default(div_2, torch.float16); div_2 = None expand_2: "f16[1521, 4, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_14, [1521, 4, 49, 49]); convert_element_type_14 = None view_19: "f16[6084, 49, 49]" = torch.ops.aten.reshape.default(expand_2, [6084, 49, 49]); expand_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_2: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 2); permute_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_3: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(select_2, [1521, 4, 49, 32]); select_2 = None clone_8: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_3, memory_format = torch.contiguous_format); expand_3 = None view_20: "f16[6084, 49, 32]" = torch.ops.aten.reshape.default(clone_8, [6084, 49, 32]); clone_8 = None bmm_1: "f16[6084, 49, 32]" = torch.ops.aten.bmm.default(view_19, view_20); view_19 = view_20 = None view_21: "f16[1521, 4, 49, 32]" = torch.ops.aten.reshape.default(bmm_1, [1521, 4, 49, 32]); bmm_1 = None permute_10: "f16[1521, 49, 4, 32]" = torch.ops.aten.permute.default(view_21, [0, 2, 1, 3]); view_21 = None clone_9: "f16[1521, 49, 4, 32]" = torch.ops.aten.clone.default(permute_10, memory_format = torch.contiguous_format); permute_10 = None view_22: "f16[1521, 49, 128]" = torch.ops.aten.reshape.default(clone_9, [1521, 49, 128]); clone_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_23: "f16[74529, 128]" = torch.ops.aten.reshape.default(view_22, [74529, 128]); view_22 = None convert_element_type_18: "f16[128, 128]" = torch.ops.prims.convert_element_type.default(arg11_1, torch.float16); arg11_1 = None permute_11: "f16[128, 128]" = torch.ops.aten.permute.default(convert_element_type_18, [1, 0]); convert_element_type_18 = None # No stacktrace found for following nodes mm_default_86: "f16[74529, 128]" = torch.ops.aten.mm.default(view_23, permute_11); view_23 = permute_11 = None add_tensor_86: "f16[74529, 128]" = torch.ops.aten.add.Tensor(mm_default_86, convert_element_type_17); mm_default_86 = convert_element_type_17 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_24: "f16[1521, 49, 128]" = torch.ops.aten.reshape.default(add_tensor_86, [1521, 49, 128]); add_tensor_86 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_25: "f16[1521, 7, 7, 128]" = torch.ops.aten.reshape.default(view_24, [-1, 7, 7, 128]); view_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_26: "f16[1, 39, 39, 7, 7, 128]" = torch.ops.aten.reshape.default(view_25, [1, 39, 39, 7, 7, -1]); view_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_12: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.permute.default(view_26, [0, 1, 3, 2, 4, 5]); view_26 = None clone_11: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.clone.default(permute_12, memory_format = torch.contiguous_format); permute_12 = None view_27: "f16[1, 273, 273, 128]" = torch.ops.aten.reshape.default(clone_11, [1, 273, 273, -1]); clone_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_133: "f16[1, 272, 273, 128]" = torch.ops.aten.slice.Tensor(view_27, 1, 0, 272); view_27 = None slice_134: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_133, 2, 0, 272); slice_133 = None clone_12: "f16[1, 272, 272, 128]" = torch.ops.aten.clone.default(slice_134, memory_format = torch.contiguous_format); slice_134 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_28: "f16[1, 73984, 128]" = torch.ops.aten.reshape.default(clone_12, [1, 73984, 128]); clone_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_5: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(permute_2, view_28); permute_2 = view_28 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_2 = torch.ops.aten.var_mean.correction(add_5, [2], correction = 0, keepdim = True) getitem_4: "f32[1, 73984, 1]" = var_mean_2[0] getitem_5: "f32[1, 73984, 1]" = var_mean_2[1]; var_mean_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_30: "f16[128]" = torch.ops.prims.convert_element_type.default(arg18_1, torch.float16); arg18_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_22: "f16[512]" = torch.ops.prims.convert_element_type.default(arg16_1, torch.float16); arg16_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_4: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_5, getitem_5); getitem_5 = None add_6: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_4, 1e-05); getitem_4 = None rsqrt_2: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_6); add_6 = None mul_7: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_2); sub_4 = rsqrt_2 = None mul_8: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_7, arg13_1); mul_7 = arg13_1 = None add_7: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_8, arg14_1); mul_8 = arg14_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_24: "f16[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(add_7, torch.float16); add_7 = None view_29: "f16[73984, 128]" = torch.ops.aten.reshape.default(convert_element_type_24, [73984, 128]); convert_element_type_24 = None convert_element_type_23: "f16[512, 128]" = torch.ops.prims.convert_element_type.default(arg15_1, torch.float16); arg15_1 = None permute_13: "f16[128, 512]" = torch.ops.aten.permute.default(convert_element_type_23, [1, 0]); convert_element_type_23 = None # No stacktrace found for following nodes mm_default_85: "f16[73984, 512]" = torch.ops.aten.mm.default(view_29, permute_13); view_29 = permute_13 = None add_tensor_85: "f16[73984, 512]" = torch.ops.aten.add.Tensor(mm_default_85, convert_element_type_22); mm_default_85 = convert_element_type_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_30: "f16[1, 73984, 512]" = torch.ops.aten.reshape.default(add_tensor_85, [1, 73984, 512]); add_tensor_85 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_28: "f32[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(view_30, torch.float32); view_30 = None mul_9: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_28, 0.5) mul_10: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_28, 0.7071067811865476); convert_element_type_28 = None erf: "f32[1, 73984, 512]" = torch.ops.aten.erf.default(mul_10); mul_10 = None add_8: "f32[1, 73984, 512]" = torch.ops.aten.add.Tensor(erf, 1); erf = None mul_11: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(mul_9, add_8); mul_9 = add_8 = None convert_element_type_29: "f16[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(mul_11, torch.float16); mul_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_31: "f16[73984, 512]" = torch.ops.aten.reshape.default(convert_element_type_29, [73984, 512]); convert_element_type_29 = None convert_element_type_31: "f16[128, 512]" = torch.ops.prims.convert_element_type.default(arg17_1, torch.float16); arg17_1 = None permute_14: "f16[512, 128]" = torch.ops.aten.permute.default(convert_element_type_31, [1, 0]); convert_element_type_31 = None # No stacktrace found for following nodes mm_default_84: "f16[73984, 128]" = torch.ops.aten.mm.default(view_31, permute_14); view_31 = permute_14 = None add_tensor_84: "f16[73984, 128]" = torch.ops.aten.add.Tensor(mm_default_84, convert_element_type_30); mm_default_84 = convert_element_type_30 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_32: "f16[1, 73984, 128]" = torch.ops.aten.reshape.default(add_tensor_84, [1, 73984, 128]); add_tensor_84 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_9: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_5, view_32); add_5 = view_32 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_3 = torch.ops.aten.var_mean.correction(add_9, [2], correction = 0, keepdim = True) getitem_6: "f32[1, 73984, 1]" = var_mean_3[0] getitem_7: "f32[1, 73984, 1]" = var_mean_3[1]; var_mean_3 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_46: "f16[128]" = torch.ops.prims.convert_element_type.default(arg26_1, torch.float16); arg26_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_35: "f16[384]" = torch.ops.prims.convert_element_type.default(arg22_1, torch.float16); arg22_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_5: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_9, getitem_7); getitem_7 = None add_10: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_6, 1e-05); getitem_6 = None rsqrt_3: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_10); add_10 = None mul_12: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_3); sub_5 = rsqrt_3 = None mul_13: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_12, arg19_1); mul_12 = arg19_1 = None add_11: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_13, arg20_1); mul_13 = arg20_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_33: "f32[1, 272, 272, 128]" = torch.ops.aten.reshape.default(add_11, [1, 272, 272, 128]); add_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_1: "f32[1, 273, 273, 128]" = torch.ops.aten.constant_pad_nd.default(view_33, [0, 0, 0, 1, 0, 1], 0.0); view_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_12: "i64[273]" = torch.ops.aten.add.Tensor(iota, 3); iota = None fmod: "i64[273]" = torch.ops.aten.fmod.Scalar(add_12, 273); add_12 = None index_1: "f32[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(constant_pad_nd_1, [None, fmod]); constant_pad_nd_1 = fmod = None iota_1: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_13: "i64[273]" = torch.ops.aten.add.Tensor(iota_1, 3); iota_1 = None fmod_1: "i64[273]" = torch.ops.aten.fmod.Scalar(add_13, 273); add_13 = None index_2: "f32[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(index_1, [None, None, fmod_1]); index_1 = fmod_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_34: "f32[1, 39, 7, 39, 7, 128]" = torch.ops.aten.reshape.default(index_2, [1, 39, 7, 39, 7, 128]); index_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_15: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.permute.default(view_34, [0, 1, 3, 2, 4, 5]); view_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_15: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.clone.default(permute_15, memory_format = torch.contiguous_format); permute_15 = None view_35: "f32[1521, 7, 7, 128]" = torch.ops.aten.reshape.default(clone_15, [-1, 7, 7, 128]); clone_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_36: "f32[1521, 49, 128]" = torch.ops.aten.reshape.default(view_35, [-1, 49, 128]); view_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_37: "f16[1521, 49, 128]" = torch.ops.prims.convert_element_type.default(view_36, torch.float16); view_36 = None view_37: "f16[74529, 128]" = torch.ops.aten.reshape.default(convert_element_type_37, [74529, 128]); convert_element_type_37 = None convert_element_type_36: "f16[384, 128]" = torch.ops.prims.convert_element_type.default(arg21_1, torch.float16); arg21_1 = None permute_16: "f16[128, 384]" = torch.ops.aten.permute.default(convert_element_type_36, [1, 0]); convert_element_type_36 = None # No stacktrace found for following nodes mm_default_83: "f16[74529, 384]" = torch.ops.aten.mm.default(view_37, permute_16); view_37 = permute_16 = None add_tensor_83: "f16[74529, 384]" = torch.ops.aten.add.Tensor(mm_default_83, convert_element_type_35); mm_default_83 = convert_element_type_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_38: "f16[1521, 49, 384]" = torch.ops.aten.reshape.default(add_tensor_83, [1521, 49, 384]); add_tensor_83 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_39: "f16[1521, 49, 3, 4, 32]" = torch.ops.aten.reshape.default(view_38, [1521, 49, 3, 4, 32]); view_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_17: "f16[3, 1521, 4, 49, 32]" = torch.ops.aten.permute.default(view_39, [2, 0, 3, 1, 4]); view_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_3: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_14: "f16[1521, 4, 49, 32]" = torch.ops.aten.mul.Tensor(select_3, 0.1767766952966369); select_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(mul_14, [1521, 4, 49, 32]); mul_14 = None clone_16: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_4, memory_format = torch.contiguous_format); expand_4 = None view_40: "f16[6084, 49, 32]" = torch.ops.aten.reshape.default(clone_16, [6084, 49, 32]); clone_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_18: "f16[1521, 4, 32, 49]" = torch.ops.aten.permute.default(select_4, [0, 1, 3, 2]); select_4 = None expand_5: "f16[1521, 4, 32, 49]" = torch.ops.aten.expand.default(permute_18, [1521, 4, 32, 49]); permute_18 = None clone_17: "f16[1521, 4, 32, 49]" = torch.ops.aten.clone.default(expand_5, memory_format = torch.contiguous_format); expand_5 = None view_41: "f16[6084, 32, 49]" = torch.ops.aten.reshape.default(clone_17, [6084, 32, 49]); clone_17 = None bmm_2: "f16[6084, 49, 49]" = torch.ops.aten.bmm.default(view_40, view_41); view_40 = view_41 = None view_42: "f16[1521, 4, 49, 49]" = torch.ops.aten.reshape.default(bmm_2, [1521, 4, 49, 49]); bmm_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_43: "i64[2401]" = torch.ops.aten.reshape.default(arg24_1, [-1]); arg24_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_3: "f32[2401, 4]" = torch.ops.aten.index.Tensor(arg23_1, [view_43]); arg23_1 = view_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_44: "f32[49, 49, 4]" = torch.ops.aten.reshape.default(index_3, [49, 49, -1]); index_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_19: "f32[4, 49, 49]" = torch.ops.aten.permute.default(view_44, [2, 0, 1]); view_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_18: "f32[4, 49, 49]" = torch.ops.aten.clone.default(permute_19, memory_format = torch.contiguous_format); permute_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_3: "f32[1, 4, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_18, 0); clone_18 = None add_14: "f32[1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_42, unsqueeze_3); view_42 = unsqueeze_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_45: "f32[1, 1521, 4, 49, 49]" = torch.ops.aten.reshape.default(add_14, [1, 1521, 4, 49, 49]); add_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full: "f32[1, 273, 273, 1]" = torch.ops.aten.full.default([1, 273, 273, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_6: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(full, 1, 0, -7) slice_7: "f32[1, 266, 266, 1]" = torch.ops.aten.slice.Tensor(slice_6, 2, 0, -7); slice_6 = None slice_2: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(full, 1, 0, -7) slice_3: "f32[1, 266, 266, 1]" = torch.ops.aten.slice.Tensor(slice_2, 2, 0, -7); slice_2 = None lift_fresh_copy_2: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant2); _tensor_constant2 = None copy: "f32[1, 266, 266, 1]" = torch.ops.aten.copy.default(slice_3, lift_fresh_copy_2); slice_3 = lift_fresh_copy_2 = None # No stacktrace found for following nodes slice_tensor: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(full, 1, 0, -7) slice_scatter_default: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor, copy, 2, 0, -7); slice_tensor = copy = None slice_scatter_default_1: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(full, slice_scatter_default, 1, 0, -7); full = slice_scatter_default = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_21: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_1, 1, 0, -7) slice_22: "f32[1, 266, 4, 1]" = torch.ops.aten.slice.Tensor(slice_21, 2, -7, -3); slice_21 = None slice_17: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_1, 1, 0, -7) slice_18: "f32[1, 266, 4, 1]" = torch.ops.aten.slice.Tensor(slice_17, 2, -7, -3); slice_17 = None lift_fresh_copy_3: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant3); _tensor_constant3 = None copy_1: "f32[1, 266, 4, 1]" = torch.ops.aten.copy.default(slice_18, lift_fresh_copy_3); slice_18 = lift_fresh_copy_3 = None # No stacktrace found for following nodes slice_tensor_1: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_1, 1, 0, -7) slice_scatter_default_2: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_1, copy_1, 2, -7, -3); slice_tensor_1 = copy_1 = None slice_scatter_default_3: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_1, slice_scatter_default_2, 1, 0, -7); slice_scatter_default_1 = slice_scatter_default_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_36: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_3, 1, 0, -7) slice_37: "f32[1, 266, 3, 1]" = torch.ops.aten.slice.Tensor(slice_36, 2, -3, 9223372036854775807); slice_36 = None slice_32: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_3, 1, 0, -7) slice_33: "f32[1, 266, 3, 1]" = torch.ops.aten.slice.Tensor(slice_32, 2, -3, 9223372036854775807); slice_32 = None lift_fresh_copy_4: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant4); _tensor_constant4 = None copy_2: "f32[1, 266, 3, 1]" = torch.ops.aten.copy.default(slice_33, lift_fresh_copy_4); slice_33 = lift_fresh_copy_4 = None # No stacktrace found for following nodes slice_tensor_2: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_3, 1, 0, -7) slice_scatter_default_4: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_2, copy_2, 2, -3, 9223372036854775807); slice_tensor_2 = copy_2 = None slice_scatter_default_5: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_3, slice_scatter_default_4, 1, 0, -7); slice_scatter_default_3 = slice_scatter_default_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default: "f32[1, 4, 273, 1]" = torch.ops.aten.full.default([1, 4, 273, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_52: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(full_default, 2, 0, -7) slice_47: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_5, 1, -7, -3) slice_48: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(slice_47, 2, 0, -7); slice_47 = None lift_fresh_copy_5: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant5); _tensor_constant5 = None copy_3: "f32[1, 4, 266, 1]" = torch.ops.aten.copy.default(slice_48, lift_fresh_copy_5); slice_48 = lift_fresh_copy_5 = None # No stacktrace found for following nodes slice_tensor_3: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(full_default, 2, 0, -7) slice_scatter_default_6: "f32[1, 4, 266, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_3, copy_3, 3, 0, 9223372036854775807); slice_tensor_3 = copy_3 = None slice_scatter_default_7: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(full_default, slice_scatter_default_6, 2, 0, -7); full_default = slice_scatter_default_6 = None slice_scatter_default_8: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_5, slice_scatter_default_7, 1, -7, -3); slice_scatter_default_5 = slice_scatter_default_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_66: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_8, 1, -7, -3) slice_67: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_66, 2, -7, -3); slice_66 = None slice_62: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_8, 1, -7, -3) slice_63: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_62, 2, -7, -3); slice_62 = None lift_fresh_copy_6: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant6); _tensor_constant6 = None copy_4: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_63, lift_fresh_copy_6); slice_63 = lift_fresh_copy_6 = None # No stacktrace found for following nodes slice_tensor_4: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_8, 1, -7, -3) slice_scatter_default_9: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_4, copy_4, 2, -7, -3); slice_tensor_4 = copy_4 = None slice_scatter_default_10: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_8, slice_scatter_default_9, 1, -7, -3); slice_scatter_default_8 = slice_scatter_default_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_81: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_10, 1, -7, -3) slice_82: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_81, 2, -3, 9223372036854775807); slice_81 = None slice_77: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_10, 1, -7, -3) slice_78: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_77, 2, -3, 9223372036854775807); slice_77 = None lift_fresh_copy_7: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant7); _tensor_constant7 = None copy_5: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_78, lift_fresh_copy_7); slice_78 = lift_fresh_copy_7 = None # No stacktrace found for following nodes slice_tensor_5: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_10, 1, -7, -3) slice_scatter_default_11: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_5, copy_5, 2, -3, 9223372036854775807); slice_tensor_5 = copy_5 = None slice_scatter_default_12: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_10, slice_scatter_default_11, 1, -7, -3); slice_scatter_default_10 = slice_scatter_default_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default_1: "f32[1, 3, 273, 1]" = torch.ops.aten.full.default([1, 3, 273, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_97: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(full_default_1, 2, 0, -7) slice_92: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_12, 1, -3, 9223372036854775807) slice_93: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(slice_92, 2, 0, -7); slice_92 = None lift_fresh_copy_8: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant8); _tensor_constant8 = None copy_6: "f32[1, 3, 266, 1]" = torch.ops.aten.copy.default(slice_93, lift_fresh_copy_8); slice_93 = lift_fresh_copy_8 = None # No stacktrace found for following nodes slice_tensor_6: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(full_default_1, 2, 0, -7) slice_scatter_default_13: "f32[1, 3, 266, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_6, copy_6, 3, 0, 9223372036854775807); slice_tensor_6 = copy_6 = None slice_scatter_default_14: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(full_default_1, slice_scatter_default_13, 2, 0, -7); full_default_1 = slice_scatter_default_13 = None slice_scatter_default_15: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_12, slice_scatter_default_14, 1, -3, 9223372036854775807); slice_scatter_default_12 = slice_scatter_default_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_111: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_15, 1, -3, 9223372036854775807) slice_112: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_111, 2, -7, -3); slice_111 = None slice_107: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_15, 1, -3, 9223372036854775807) slice_108: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_107, 2, -7, -3); slice_107 = None lift_fresh_copy_9: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant9); _tensor_constant9 = None copy_7: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_108, lift_fresh_copy_9); slice_108 = lift_fresh_copy_9 = None # No stacktrace found for following nodes slice_tensor_7: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_15, 1, -3, 9223372036854775807) slice_scatter_default_16: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_7, copy_7, 2, -7, -3); slice_tensor_7 = copy_7 = None slice_scatter_default_17: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_15, slice_scatter_default_16, 1, -3, 9223372036854775807); slice_scatter_default_15 = slice_scatter_default_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_126: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_17, 1, -3, 9223372036854775807) slice_127: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_126, 2, -3, 9223372036854775807); slice_126 = None slice_122: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_17, 1, -3, 9223372036854775807) slice_123: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_122, 2, -3, 9223372036854775807); slice_122 = None lift_fresh_copy_10: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant10); _tensor_constant10 = None copy_8: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_123, lift_fresh_copy_10); slice_123 = lift_fresh_copy_10 = None # No stacktrace found for following nodes slice_tensor_8: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_17, 1, -3, 9223372036854775807) slice_scatter_default_18: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_8, copy_8, 2, -3, 9223372036854775807); slice_tensor_8 = copy_8 = None slice_scatter_default_19: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_17, slice_scatter_default_18, 1, -3, 9223372036854775807); slice_scatter_default_17 = slice_scatter_default_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_4: "f32[1, 39, 7, 39, 7, 1]" = torch.ops.aten.reshape.default(slice_scatter_default_19, [1, 39, 7, 39, 7, 1]); slice_scatter_default_19 = None permute_4: "f32[1, 39, 39, 7, 7, 1]" = torch.ops.aten.permute.default(view_4, [0, 1, 3, 2, 4, 5]); view_4 = None clone_2: "f32[1, 39, 39, 7, 7, 1]" = torch.ops.aten.clone.default(permute_4, memory_format = torch.contiguous_format); permute_4 = None view_5: "f32[1521, 7, 7, 1]" = torch.ops.aten.reshape.default(clone_2, [-1, 7, 7, 1]); clone_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_6: "f32[1521, 49]" = torch.ops.aten.reshape.default(view_5, [-1, 49]); view_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze: "f32[1521, 1, 49]" = torch.ops.aten.unsqueeze.default(view_6, 1) unsqueeze_1: "f32[1521, 49, 1]" = torch.ops.aten.unsqueeze.default(view_6, 2); view_6 = None sub_1: "f32[1521, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze, unsqueeze_1); unsqueeze = unsqueeze_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq: "b8[1521, 49, 49]" = torch.ops.aten.eq.Scalar(sub_1, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( full_default_3: "f32[]" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne: "b8[1521, 49, 49]" = torch.ops.aten.ne.Scalar(sub_1, 0) full_default_2: "f32[]" = torch.ops.aten.full.default([], -100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) where: "f32[1521, 49, 49]" = torch.ops.aten.where.self(ne, full_default_2, sub_1); ne = full_default_2 = sub_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( where_1: "f32[1521, 49, 49]" = torch.ops.aten.where.self(eq, full_default_3, where); eq = full_default_3 = where = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_4: "f32[1521, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_1, 1); where_1 = None unsqueeze_5: "f32[1, 1521, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_4, 0); unsqueeze_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_15: "f32[1, 1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_45, unsqueeze_5); view_45 = unsqueeze_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_46: "f32[1521, 4, 49, 49]" = torch.ops.aten.reshape.default(add_15, [-1, 4, 49, 49]); add_15 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_1: "f32[1521, 4, 49, 1]" = torch.ops.aten.amax.default(view_46, [-1], True) sub_6: "f32[1521, 4, 49, 49]" = torch.ops.aten.sub.Tensor(view_46, amax_1); view_46 = amax_1 = None exp_1: "f32[1521, 4, 49, 49]" = torch.ops.aten.exp.default(sub_6); sub_6 = None sum_2: "f32[1521, 4, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_1, [-1], True) div_3: "f32[1521, 4, 49, 49]" = torch.ops.aten.div.Tensor(exp_1, sum_2); exp_1 = sum_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_43: "f16[1521, 4, 49, 49]" = torch.ops.prims.convert_element_type.default(div_3, torch.float16); div_3 = None expand_6: "f16[1521, 4, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_43, [1521, 4, 49, 49]); convert_element_type_43 = None view_47: "f16[6084, 49, 49]" = torch.ops.aten.reshape.default(expand_6, [6084, 49, 49]); expand_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_5: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 2); permute_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_7: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(select_5, [1521, 4, 49, 32]); select_5 = None clone_20: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_7, memory_format = torch.contiguous_format); expand_7 = None view_48: "f16[6084, 49, 32]" = torch.ops.aten.reshape.default(clone_20, [6084, 49, 32]); clone_20 = None bmm_3: "f16[6084, 49, 32]" = torch.ops.aten.bmm.default(view_47, view_48); view_47 = view_48 = None view_49: "f16[1521, 4, 49, 32]" = torch.ops.aten.reshape.default(bmm_3, [1521, 4, 49, 32]); bmm_3 = None permute_20: "f16[1521, 49, 4, 32]" = torch.ops.aten.permute.default(view_49, [0, 2, 1, 3]); view_49 = None clone_21: "f16[1521, 49, 4, 32]" = torch.ops.aten.clone.default(permute_20, memory_format = torch.contiguous_format); permute_20 = None view_50: "f16[1521, 49, 128]" = torch.ops.aten.reshape.default(clone_21, [1521, 49, 128]); clone_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_51: "f16[74529, 128]" = torch.ops.aten.reshape.default(view_50, [74529, 128]); view_50 = None convert_element_type_47: "f16[128, 128]" = torch.ops.prims.convert_element_type.default(arg25_1, torch.float16); arg25_1 = None permute_21: "f16[128, 128]" = torch.ops.aten.permute.default(convert_element_type_47, [1, 0]); convert_element_type_47 = None # No stacktrace found for following nodes mm_default_82: "f16[74529, 128]" = torch.ops.aten.mm.default(view_51, permute_21); view_51 = permute_21 = None add_tensor_82: "f16[74529, 128]" = torch.ops.aten.add.Tensor(mm_default_82, convert_element_type_46); mm_default_82 = convert_element_type_46 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_52: "f16[1521, 49, 128]" = torch.ops.aten.reshape.default(add_tensor_82, [1521, 49, 128]); add_tensor_82 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_53: "f16[1521, 7, 7, 128]" = torch.ops.aten.reshape.default(view_52, [-1, 7, 7, 128]); view_52 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_54: "f16[1, 39, 39, 7, 7, 128]" = torch.ops.aten.reshape.default(view_53, [1, 39, 39, 7, 7, -1]); view_53 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_22: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.permute.default(view_54, [0, 1, 3, 2, 4, 5]); view_54 = None clone_23: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.clone.default(permute_22, memory_format = torch.contiguous_format); permute_22 = None view_55: "f16[1, 273, 273, 128]" = torch.ops.aten.reshape.default(clone_23, [1, 273, 273, -1]); clone_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_2: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_16: "i64[273]" = torch.ops.aten.add.Tensor(iota_2, 270); iota_2 = None fmod_2: "i64[273]" = torch.ops.aten.fmod.Scalar(add_16, 273); add_16 = None index_4: "f16[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(view_55, [None, fmod_2]); view_55 = fmod_2 = None iota_3: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_17: "i64[273]" = torch.ops.aten.add.Tensor(iota_3, 270); iota_3 = None fmod_3: "i64[273]" = torch.ops.aten.fmod.Scalar(add_17, 273); add_17 = None index_5: "f16[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(index_4, [None, None, fmod_3]); index_4 = fmod_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_143: "f16[1, 272, 273, 128]" = torch.ops.aten.slice.Tensor(index_5, 1, 0, 272); index_5 = None slice_144: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_143, 2, 0, 272); slice_143 = None clone_24: "f16[1, 272, 272, 128]" = torch.ops.aten.clone.default(slice_144, memory_format = torch.contiguous_format); slice_144 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_56: "f16[1, 73984, 128]" = torch.ops.aten.reshape.default(clone_24, [1, 73984, 128]); clone_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_18: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_9, view_56); add_9 = view_56 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_4 = torch.ops.aten.var_mean.correction(add_18, [2], correction = 0, keepdim = True) getitem_8: "f32[1, 73984, 1]" = var_mean_4[0] getitem_9: "f32[1, 73984, 1]" = var_mean_4[1]; var_mean_4 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_59: "f16[128]" = torch.ops.prims.convert_element_type.default(arg32_1, torch.float16); arg32_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_51: "f16[512]" = torch.ops.prims.convert_element_type.default(arg30_1, torch.float16); arg30_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_7: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_18, getitem_9); getitem_9 = None add_19: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_8, 1e-05); getitem_8 = None rsqrt_4: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_19); add_19 = None mul_15: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_4); sub_7 = rsqrt_4 = None mul_16: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_15, arg27_1); mul_15 = arg27_1 = None add_20: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_16, arg28_1); mul_16 = arg28_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_53: "f16[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(add_20, torch.float16); add_20 = None view_57: "f16[73984, 128]" = torch.ops.aten.reshape.default(convert_element_type_53, [73984, 128]); convert_element_type_53 = None convert_element_type_52: "f16[512, 128]" = torch.ops.prims.convert_element_type.default(arg29_1, torch.float16); arg29_1 = None permute_23: "f16[128, 512]" = torch.ops.aten.permute.default(convert_element_type_52, [1, 0]); convert_element_type_52 = None # No stacktrace found for following nodes mm_default_81: "f16[73984, 512]" = torch.ops.aten.mm.default(view_57, permute_23); view_57 = permute_23 = None add_tensor_81: "f16[73984, 512]" = torch.ops.aten.add.Tensor(mm_default_81, convert_element_type_51); mm_default_81 = convert_element_type_51 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_58: "f16[1, 73984, 512]" = torch.ops.aten.reshape.default(add_tensor_81, [1, 73984, 512]); add_tensor_81 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_57: "f32[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(view_58, torch.float32); view_58 = None mul_17: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_57, 0.5) mul_18: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_57, 0.7071067811865476); convert_element_type_57 = None erf_1: "f32[1, 73984, 512]" = torch.ops.aten.erf.default(mul_18); mul_18 = None add_21: "f32[1, 73984, 512]" = torch.ops.aten.add.Tensor(erf_1, 1); erf_1 = None mul_19: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(mul_17, add_21); mul_17 = add_21 = None convert_element_type_58: "f16[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(mul_19, torch.float16); mul_19 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_59: "f16[73984, 512]" = torch.ops.aten.reshape.default(convert_element_type_58, [73984, 512]); convert_element_type_58 = None convert_element_type_60: "f16[128, 512]" = torch.ops.prims.convert_element_type.default(arg31_1, torch.float16); arg31_1 = None permute_24: "f16[512, 128]" = torch.ops.aten.permute.default(convert_element_type_60, [1, 0]); convert_element_type_60 = None # No stacktrace found for following nodes mm_default_80: "f16[73984, 128]" = torch.ops.aten.mm.default(view_59, permute_24); view_59 = permute_24 = None add_tensor_80: "f16[73984, 128]" = torch.ops.aten.add.Tensor(mm_default_80, convert_element_type_59); mm_default_80 = convert_element_type_59 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_60: "f16[1, 73984, 128]" = torch.ops.aten.reshape.default(add_tensor_80, [1, 73984, 128]); add_tensor_80 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_22: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_18, view_60); add_18 = view_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:344 in forward, code: x = x.view(B, H, W, C) view_61: "f32[1, 272, 272, 128]" = torch.ops.aten.reshape.default(add_22, [1, 272, 272, 128]) # File: /workspace/networks/encoders/swin/swin_transformer.py:351 in forward, code: x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C slice_147: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 1, 0, 9223372036854775807, 2) slice_148: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_147, 2, 0, 9223372036854775807, 2); slice_147 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:352 in forward, code: x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C slice_151: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 1, 1, 9223372036854775807, 2) slice_152: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_151, 2, 0, 9223372036854775807, 2); slice_151 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:353 in forward, code: x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C slice_155: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 1, 0, 9223372036854775807, 2) slice_156: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_155, 2, 1, 9223372036854775807, 2); slice_155 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:354 in forward, code: x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C slice_159: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 1, 1, 9223372036854775807, 2); view_61 = None slice_160: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_159, 2, 1, 9223372036854775807, 2); slice_159 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:355 in forward, code: x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C cat: "f32[1, 136, 136, 512]" = torch.ops.aten.cat.default([slice_148, slice_152, slice_156, slice_160], -1); slice_148 = slice_152 = slice_156 = slice_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:356 in forward, code: x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C view_62: "f32[1, 18496, 512]" = torch.ops.aten.reshape.default(cat, [1, -1, 512]); cat = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_5 = torch.ops.aten.var_mean.correction(view_62, [2], correction = 0, keepdim = True) getitem_10: "f32[1, 18496, 1]" = var_mean_5[0] getitem_11: "f32[1, 18496, 1]" = var_mean_5[1]; var_mean_5 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_6 = torch.ops.aten.var_mean.correction(add_22, [2], correction = 0, keepdim = True) getitem_12: "f32[1, 73984, 1]" = var_mean_6[0] getitem_13: "f32[1, 73984, 1]" = var_mean_6[1]; var_mean_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant13: "f32[]" = self._tensor_constant13 _tensor_constant14: "f32[]" = self._tensor_constant14 _tensor_constant15: "f32[]" = self._tensor_constant15 _tensor_constant16: "f32[]" = self._tensor_constant16 _tensor_constant17: "f32[]" = self._tensor_constant17 _tensor_constant18: "f32[]" = self._tensor_constant18 _tensor_constant19: "f32[]" = self._tensor_constant19 _tensor_constant20: "f32[]" = self._tensor_constant20 _tensor_constant21: "f32[]" = self._tensor_constant21 # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_8: "f32[1, 18496, 512]" = torch.ops.aten.sub.Tensor(view_62, getitem_11); view_62 = getitem_11 = None add_23: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_10, 1e-05); getitem_10 = None rsqrt_5: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_23); add_23 = None mul_20: "f32[1, 18496, 512]" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_5); sub_8 = rsqrt_5 = None mul_21: "f32[1, 18496, 512]" = torch.ops.aten.mul.Tensor(mul_20, arg33_1); mul_20 = arg33_1 = None add_24: "f32[1, 18496, 512]" = torch.ops.aten.add.Tensor(mul_21, arg34_1); mul_21 = arg34_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_65: "f16[1, 18496, 512]" = torch.ops.prims.convert_element_type.default(add_24, torch.float16); add_24 = None view_63: "f16[18496, 512]" = torch.ops.aten.reshape.default(convert_element_type_65, [18496, 512]); convert_element_type_65 = None convert_element_type_64: "f16[256, 512]" = torch.ops.prims.convert_element_type.default(arg35_1, torch.float16); arg35_1 = None permute_25: "f16[512, 256]" = torch.ops.aten.permute.default(convert_element_type_64, [1, 0]); convert_element_type_64 = None mm: "f16[18496, 256]" = torch.ops.aten.mm.default(view_63, permute_25); view_63 = permute_25 = None view_64: "f16[1, 18496, 256]" = torch.ops.aten.reshape.default(mm, [1, 18496, 256]); mm = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_70: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(view_64, torch.float32) var_mean_7 = torch.ops.aten.var_mean.correction(convert_element_type_70, [2], correction = 0, keepdim = True) getitem_14: "f32[1, 18496, 1]" = var_mean_7[0] getitem_15: "f32[1, 18496, 1]" = var_mean_7[1]; var_mean_7 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_82: "f16[256]" = torch.ops.prims.convert_element_type.default(arg45_1, torch.float16); arg45_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_71: "f16[768]" = torch.ops.prims.convert_element_type.default(arg41_1, torch.float16); arg41_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_11: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_70, getitem_15); convert_element_type_70 = getitem_15 = None add_27: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_14, 1e-05); getitem_14 = None rsqrt_7: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_27); add_27 = None mul_26: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_7); sub_11 = rsqrt_7 = None mul_27: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_26, arg38_1); mul_26 = arg38_1 = None add_28: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_27, arg39_1); mul_27 = arg39_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_70: "f32[1, 136, 136, 256]" = torch.ops.aten.reshape.default(add_28, [1, 136, 136, 256]); add_28 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_2: "f32[1, 140, 140, 256]" = torch.ops.aten.constant_pad_nd.default(view_70, [0, 0, 0, 4, 0, 4], 0.0); view_70 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_71: "f32[1, 20, 7, 20, 7, 256]" = torch.ops.aten.reshape.default(constant_pad_nd_2, [1, 20, 7, 20, 7, 256]); constant_pad_nd_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_29: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.permute.default(view_71, [0, 1, 3, 2, 4, 5]); view_71 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_29: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.clone.default(permute_29, memory_format = torch.contiguous_format); permute_29 = None view_72: "f32[400, 7, 7, 256]" = torch.ops.aten.reshape.default(clone_29, [-1, 7, 7, 256]); clone_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_73: "f32[400, 49, 256]" = torch.ops.aten.reshape.default(view_72, [-1, 49, 256]); view_72 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_73: "f16[400, 49, 256]" = torch.ops.prims.convert_element_type.default(view_73, torch.float16); view_73 = None view_74: "f16[19600, 256]" = torch.ops.aten.reshape.default(convert_element_type_73, [19600, 256]); convert_element_type_73 = None convert_element_type_72: "f16[768, 256]" = torch.ops.prims.convert_element_type.default(arg40_1, torch.float16); arg40_1 = None permute_30: "f16[256, 768]" = torch.ops.aten.permute.default(convert_element_type_72, [1, 0]); convert_element_type_72 = None # No stacktrace found for following nodes mm_default_79: "f16[19600, 768]" = torch.ops.aten.mm.default(view_74, permute_30); view_74 = permute_30 = None add_tensor_79: "f16[19600, 768]" = torch.ops.aten.add.Tensor(mm_default_79, convert_element_type_71); mm_default_79 = convert_element_type_71 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_75: "f16[400, 49, 768]" = torch.ops.aten.reshape.default(add_tensor_79, [400, 49, 768]); add_tensor_79 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_76: "f16[400, 49, 3, 8, 32]" = torch.ops.aten.reshape.default(view_75, [400, 49, 3, 8, 32]); view_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_31: "f16[3, 400, 8, 49, 32]" = torch.ops.aten.permute.default(view_76, [2, 0, 3, 1, 4]); view_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_6: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_28: "f16[400, 8, 49, 32]" = torch.ops.aten.mul.Tensor(select_6, 0.1767766952966369); select_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_8: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(mul_28, [400, 8, 49, 32]); mul_28 = None clone_30: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_8, memory_format = torch.contiguous_format); expand_8 = None view_77: "f16[3200, 49, 32]" = torch.ops.aten.reshape.default(clone_30, [3200, 49, 32]); clone_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_7: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_32: "f16[400, 8, 32, 49]" = torch.ops.aten.permute.default(select_7, [0, 1, 3, 2]); select_7 = None expand_9: "f16[400, 8, 32, 49]" = torch.ops.aten.expand.default(permute_32, [400, 8, 32, 49]); permute_32 = None clone_31: "f16[400, 8, 32, 49]" = torch.ops.aten.clone.default(expand_9, memory_format = torch.contiguous_format); expand_9 = None view_78: "f16[3200, 32, 49]" = torch.ops.aten.reshape.default(clone_31, [3200, 32, 49]); clone_31 = None bmm_4: "f16[3200, 49, 49]" = torch.ops.aten.bmm.default(view_77, view_78); view_77 = view_78 = None view_79: "f16[400, 8, 49, 49]" = torch.ops.aten.reshape.default(bmm_4, [400, 8, 49, 49]); bmm_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_80: "i64[2401]" = torch.ops.aten.reshape.default(arg43_1, [-1]); arg43_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_6: "f32[2401, 8]" = torch.ops.aten.index.Tensor(arg42_1, [view_80]); arg42_1 = view_80 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_81: "f32[49, 49, 8]" = torch.ops.aten.reshape.default(index_6, [49, 49, -1]); index_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_33: "f32[8, 49, 49]" = torch.ops.aten.permute.default(view_81, [2, 0, 1]); view_81 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_32: "f32[8, 49, 49]" = torch.ops.aten.clone.default(permute_33, memory_format = torch.contiguous_format); permute_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_8: "f32[1, 8, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_32, 0); clone_32 = None add_29: "f32[400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_79, unsqueeze_8); view_79 = unsqueeze_8 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_2: "f32[400, 8, 49, 1]" = torch.ops.aten.amax.default(add_29, [-1], True) sub_12: "f32[400, 8, 49, 49]" = torch.ops.aten.sub.Tensor(add_29, amax_2); add_29 = amax_2 = None exp_2: "f32[400, 8, 49, 49]" = torch.ops.aten.exp.default(sub_12); sub_12 = None sum_3: "f32[400, 8, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_2, [-1], True) div_6: "f32[400, 8, 49, 49]" = torch.ops.aten.div.Tensor(exp_2, sum_3); exp_2 = sum_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_79: "f16[400, 8, 49, 49]" = torch.ops.prims.convert_element_type.default(div_6, torch.float16); div_6 = None expand_10: "f16[400, 8, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_79, [400, 8, 49, 49]); convert_element_type_79 = None view_82: "f16[3200, 49, 49]" = torch.ops.aten.reshape.default(expand_10, [3200, 49, 49]); expand_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_8: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 2); permute_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_11: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(select_8, [400, 8, 49, 32]); select_8 = None clone_34: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_11, memory_format = torch.contiguous_format); expand_11 = None view_83: "f16[3200, 49, 32]" = torch.ops.aten.reshape.default(clone_34, [3200, 49, 32]); clone_34 = None bmm_5: "f16[3200, 49, 32]" = torch.ops.aten.bmm.default(view_82, view_83); view_82 = view_83 = None view_84: "f16[400, 8, 49, 32]" = torch.ops.aten.reshape.default(bmm_5, [400, 8, 49, 32]); bmm_5 = None permute_34: "f16[400, 49, 8, 32]" = torch.ops.aten.permute.default(view_84, [0, 2, 1, 3]); view_84 = None clone_35: "f16[400, 49, 8, 32]" = torch.ops.aten.clone.default(permute_34, memory_format = torch.contiguous_format); permute_34 = None view_85: "f16[400, 49, 256]" = torch.ops.aten.reshape.default(clone_35, [400, 49, 256]); clone_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_86: "f16[19600, 256]" = torch.ops.aten.reshape.default(view_85, [19600, 256]); view_85 = None convert_element_type_83: "f16[256, 256]" = torch.ops.prims.convert_element_type.default(arg44_1, torch.float16); arg44_1 = None permute_35: "f16[256, 256]" = torch.ops.aten.permute.default(convert_element_type_83, [1, 0]); convert_element_type_83 = None # No stacktrace found for following nodes mm_default_78: "f16[19600, 256]" = torch.ops.aten.mm.default(view_86, permute_35); view_86 = permute_35 = None add_tensor_78: "f16[19600, 256]" = torch.ops.aten.add.Tensor(mm_default_78, convert_element_type_82); mm_default_78 = convert_element_type_82 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_87: "f16[400, 49, 256]" = torch.ops.aten.reshape.default(add_tensor_78, [400, 49, 256]); add_tensor_78 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_88: "f16[400, 7, 7, 256]" = torch.ops.aten.reshape.default(view_87, [-1, 7, 7, 256]); view_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_89: "f16[1, 20, 20, 7, 7, 256]" = torch.ops.aten.reshape.default(view_88, [1, 20, 20, 7, 7, -1]); view_88 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_36: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.permute.default(view_89, [0, 1, 3, 2, 4, 5]); view_89 = None clone_37: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.clone.default(permute_36, memory_format = torch.contiguous_format); permute_36 = None view_90: "f16[1, 140, 140, 256]" = torch.ops.aten.reshape.default(clone_37, [1, 140, 140, -1]); clone_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_294: "f16[1, 136, 140, 256]" = torch.ops.aten.slice.Tensor(view_90, 1, 0, 136); view_90 = None slice_295: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_294, 2, 0, 136); slice_294 = None clone_38: "f16[1, 136, 136, 256]" = torch.ops.aten.clone.default(slice_295, memory_format = torch.contiguous_format); slice_295 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_91: "f16[1, 18496, 256]" = torch.ops.aten.reshape.default(clone_38, [1, 18496, 256]); clone_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_30: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(view_64, view_91); view_64 = view_91 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_87: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_30, torch.float32) var_mean_8 = torch.ops.aten.var_mean.correction(convert_element_type_87, [2], correction = 0, keepdim = True) getitem_16: "f32[1, 18496, 1]" = var_mean_8[0] getitem_17: "f32[1, 18496, 1]" = var_mean_8[1]; var_mean_8 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_96: "f16[256]" = torch.ops.prims.convert_element_type.default(arg51_1, torch.float16); arg51_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_88: "f16[1024]" = torch.ops.prims.convert_element_type.default(arg49_1, torch.float16); arg49_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_13: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_87, getitem_17); convert_element_type_87 = getitem_17 = None add_31: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_16, 1e-05); getitem_16 = None rsqrt_8: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_31); add_31 = None mul_29: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_8); sub_13 = rsqrt_8 = None mul_30: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_29, arg46_1); mul_29 = arg46_1 = None add_32: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_30, arg47_1); mul_30 = arg47_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_90: "f16[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_32, torch.float16); add_32 = None view_92: "f16[18496, 256]" = torch.ops.aten.reshape.default(convert_element_type_90, [18496, 256]); convert_element_type_90 = None convert_element_type_89: "f16[1024, 256]" = torch.ops.prims.convert_element_type.default(arg48_1, torch.float16); arg48_1 = None permute_37: "f16[256, 1024]" = torch.ops.aten.permute.default(convert_element_type_89, [1, 0]); convert_element_type_89 = None # No stacktrace found for following nodes mm_default_77: "f16[18496, 1024]" = torch.ops.aten.mm.default(view_92, permute_37); view_92 = permute_37 = None add_tensor_77: "f16[18496, 1024]" = torch.ops.aten.add.Tensor(mm_default_77, convert_element_type_88); mm_default_77 = convert_element_type_88 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_93: "f16[1, 18496, 1024]" = torch.ops.aten.reshape.default(add_tensor_77, [1, 18496, 1024]); add_tensor_77 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_94: "f32[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(view_93, torch.float32); view_93 = None mul_31: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_94, 0.5) mul_32: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_94, 0.7071067811865476); convert_element_type_94 = None erf_2: "f32[1, 18496, 1024]" = torch.ops.aten.erf.default(mul_32); mul_32 = None add_33: "f32[1, 18496, 1024]" = torch.ops.aten.add.Tensor(erf_2, 1); erf_2 = None mul_33: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(mul_31, add_33); mul_31 = add_33 = None convert_element_type_95: "f16[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(mul_33, torch.float16); mul_33 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_94: "f16[18496, 1024]" = torch.ops.aten.reshape.default(convert_element_type_95, [18496, 1024]); convert_element_type_95 = None convert_element_type_97: "f16[256, 1024]" = torch.ops.prims.convert_element_type.default(arg50_1, torch.float16); arg50_1 = None permute_38: "f16[1024, 256]" = torch.ops.aten.permute.default(convert_element_type_97, [1, 0]); convert_element_type_97 = None # No stacktrace found for following nodes mm_default_76: "f16[18496, 256]" = torch.ops.aten.mm.default(view_94, permute_38); view_94 = permute_38 = None add_tensor_76: "f16[18496, 256]" = torch.ops.aten.add.Tensor(mm_default_76, convert_element_type_96); mm_default_76 = convert_element_type_96 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_95: "f16[1, 18496, 256]" = torch.ops.aten.reshape.default(add_tensor_76, [1, 18496, 256]); add_tensor_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_34: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_30, view_95); add_30 = view_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_101: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_34, torch.float32) var_mean_9 = torch.ops.aten.var_mean.correction(convert_element_type_101, [2], correction = 0, keepdim = True) getitem_18: "f32[1, 18496, 1]" = var_mean_9[0] getitem_19: "f32[1, 18496, 1]" = var_mean_9[1]; var_mean_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_113: "f16[256]" = torch.ops.prims.convert_element_type.default(arg59_1, torch.float16); arg59_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_102: "f16[768]" = torch.ops.prims.convert_element_type.default(arg55_1, torch.float16); arg55_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_14: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_101, getitem_19); convert_element_type_101 = getitem_19 = None add_35: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_18, 1e-05); getitem_18 = None rsqrt_9: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_35); add_35 = None mul_34: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_9); sub_14 = rsqrt_9 = None mul_35: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_34, arg52_1); mul_34 = arg52_1 = None add_36: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_35, arg53_1); mul_35 = arg53_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_96: "f32[1, 136, 136, 256]" = torch.ops.aten.reshape.default(add_36, [1, 136, 136, 256]); add_36 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_3: "f32[1, 140, 140, 256]" = torch.ops.aten.constant_pad_nd.default(view_96, [0, 0, 0, 4, 0, 4], 0.0); view_96 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_4: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_37: "i64[140]" = torch.ops.aten.add.Tensor(iota_4, 3); iota_4 = None fmod_4: "i64[140]" = torch.ops.aten.fmod.Scalar(add_37, 140); add_37 = None index_7: "f32[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(constant_pad_nd_3, [None, fmod_4]); constant_pad_nd_3 = fmod_4 = None iota_5: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_38: "i64[140]" = torch.ops.aten.add.Tensor(iota_5, 3); iota_5 = None fmod_5: "i64[140]" = torch.ops.aten.fmod.Scalar(add_38, 140); add_38 = None index_8: "f32[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(index_7, [None, None, fmod_5]); index_7 = fmod_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_97: "f32[1, 20, 7, 20, 7, 256]" = torch.ops.aten.reshape.default(index_8, [1, 20, 7, 20, 7, 256]); index_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_39: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.permute.default(view_97, [0, 1, 3, 2, 4, 5]); view_97 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_41: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.clone.default(permute_39, memory_format = torch.contiguous_format); permute_39 = None view_98: "f32[400, 7, 7, 256]" = torch.ops.aten.reshape.default(clone_41, [-1, 7, 7, 256]); clone_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_99: "f32[400, 49, 256]" = torch.ops.aten.reshape.default(view_98, [-1, 49, 256]); view_98 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_104: "f16[400, 49, 256]" = torch.ops.prims.convert_element_type.default(view_99, torch.float16); view_99 = None view_100: "f16[19600, 256]" = torch.ops.aten.reshape.default(convert_element_type_104, [19600, 256]); convert_element_type_104 = None convert_element_type_103: "f16[768, 256]" = torch.ops.prims.convert_element_type.default(arg54_1, torch.float16); arg54_1 = None permute_40: "f16[256, 768]" = torch.ops.aten.permute.default(convert_element_type_103, [1, 0]); convert_element_type_103 = None # No stacktrace found for following nodes mm_default_75: "f16[19600, 768]" = torch.ops.aten.mm.default(view_100, permute_40); view_100 = permute_40 = None add_tensor_75: "f16[19600, 768]" = torch.ops.aten.add.Tensor(mm_default_75, convert_element_type_102); mm_default_75 = convert_element_type_102 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_101: "f16[400, 49, 768]" = torch.ops.aten.reshape.default(add_tensor_75, [400, 49, 768]); add_tensor_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_102: "f16[400, 49, 3, 8, 32]" = torch.ops.aten.reshape.default(view_101, [400, 49, 3, 8, 32]); view_101 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_41: "f16[3, 400, 8, 49, 32]" = torch.ops.aten.permute.default(view_102, [2, 0, 3, 1, 4]); view_102 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_9: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_36: "f16[400, 8, 49, 32]" = torch.ops.aten.mul.Tensor(select_9, 0.1767766952966369); select_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_12: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(mul_36, [400, 8, 49, 32]); mul_36 = None clone_42: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_12, memory_format = torch.contiguous_format); expand_12 = None view_103: "f16[3200, 49, 32]" = torch.ops.aten.reshape.default(clone_42, [3200, 49, 32]); clone_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_10: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_42: "f16[400, 8, 32, 49]" = torch.ops.aten.permute.default(select_10, [0, 1, 3, 2]); select_10 = None expand_13: "f16[400, 8, 32, 49]" = torch.ops.aten.expand.default(permute_42, [400, 8, 32, 49]); permute_42 = None clone_43: "f16[400, 8, 32, 49]" = torch.ops.aten.clone.default(expand_13, memory_format = torch.contiguous_format); expand_13 = None view_104: "f16[3200, 32, 49]" = torch.ops.aten.reshape.default(clone_43, [3200, 32, 49]); clone_43 = None bmm_6: "f16[3200, 49, 49]" = torch.ops.aten.bmm.default(view_103, view_104); view_103 = view_104 = None view_105: "f16[400, 8, 49, 49]" = torch.ops.aten.reshape.default(bmm_6, [400, 8, 49, 49]); bmm_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_106: "i64[2401]" = torch.ops.aten.reshape.default(arg57_1, [-1]); arg57_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_9: "f32[2401, 8]" = torch.ops.aten.index.Tensor(arg56_1, [view_106]); arg56_1 = view_106 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_107: "f32[49, 49, 8]" = torch.ops.aten.reshape.default(index_9, [49, 49, -1]); index_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_43: "f32[8, 49, 49]" = torch.ops.aten.permute.default(view_107, [2, 0, 1]); view_107 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_44: "f32[8, 49, 49]" = torch.ops.aten.clone.default(permute_43, memory_format = torch.contiguous_format); permute_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_9: "f32[1, 8, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_44, 0); clone_44 = None add_39: "f32[400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_105, unsqueeze_9); view_105 = unsqueeze_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_108: "f32[1, 400, 8, 49, 49]" = torch.ops.aten.reshape.default(add_39, [1, 400, 8, 49, 49]); add_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full_1: "f32[1, 140, 140, 1]" = torch.ops.aten.full.default([1, 140, 140, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_167: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(full_1, 1, 0, -7) slice_168: "f32[1, 133, 133, 1]" = torch.ops.aten.slice.Tensor(slice_167, 2, 0, -7); slice_167 = None slice_163: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(full_1, 1, 0, -7) slice_164: "f32[1, 133, 133, 1]" = torch.ops.aten.slice.Tensor(slice_163, 2, 0, -7); slice_163 = None lift_fresh_copy_13: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant13); _tensor_constant13 = None copy_9: "f32[1, 133, 133, 1]" = torch.ops.aten.copy.default(slice_164, lift_fresh_copy_13); slice_164 = lift_fresh_copy_13 = None # No stacktrace found for following nodes slice_tensor_9: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(full_1, 1, 0, -7) slice_scatter_default_20: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_9, copy_9, 2, 0, -7); slice_tensor_9 = copy_9 = None slice_scatter_default_21: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(full_1, slice_scatter_default_20, 1, 0, -7); full_1 = slice_scatter_default_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_182: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_21, 1, 0, -7) slice_183: "f32[1, 133, 4, 1]" = torch.ops.aten.slice.Tensor(slice_182, 2, -7, -3); slice_182 = None slice_178: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_21, 1, 0, -7) slice_179: "f32[1, 133, 4, 1]" = torch.ops.aten.slice.Tensor(slice_178, 2, -7, -3); slice_178 = None lift_fresh_copy_14: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant14); _tensor_constant14 = None copy_10: "f32[1, 133, 4, 1]" = torch.ops.aten.copy.default(slice_179, lift_fresh_copy_14); slice_179 = lift_fresh_copy_14 = None # No stacktrace found for following nodes slice_tensor_10: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_21, 1, 0, -7) slice_scatter_default_22: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_10, copy_10, 2, -7, -3); slice_tensor_10 = copy_10 = None slice_scatter_default_23: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_21, slice_scatter_default_22, 1, 0, -7); slice_scatter_default_21 = slice_scatter_default_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_197: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_23, 1, 0, -7) slice_198: "f32[1, 133, 3, 1]" = torch.ops.aten.slice.Tensor(slice_197, 2, -3, 9223372036854775807); slice_197 = None slice_193: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_23, 1, 0, -7) slice_194: "f32[1, 133, 3, 1]" = torch.ops.aten.slice.Tensor(slice_193, 2, -3, 9223372036854775807); slice_193 = None lift_fresh_copy_15: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant15); _tensor_constant15 = None copy_11: "f32[1, 133, 3, 1]" = torch.ops.aten.copy.default(slice_194, lift_fresh_copy_15); slice_194 = lift_fresh_copy_15 = None # No stacktrace found for following nodes slice_tensor_11: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_23, 1, 0, -7) slice_scatter_default_24: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_11, copy_11, 2, -3, 9223372036854775807); slice_tensor_11 = copy_11 = None slice_scatter_default_25: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_23, slice_scatter_default_24, 1, 0, -7); slice_scatter_default_23 = slice_scatter_default_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default_4: "f32[1, 4, 140, 1]" = torch.ops.aten.full.default([1, 4, 140, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_213: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(full_default_4, 2, 0, -7) slice_208: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_25, 1, -7, -3) slice_209: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(slice_208, 2, 0, -7); slice_208 = None lift_fresh_copy_16: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant16); _tensor_constant16 = None copy_12: "f32[1, 4, 133, 1]" = torch.ops.aten.copy.default(slice_209, lift_fresh_copy_16); slice_209 = lift_fresh_copy_16 = None # No stacktrace found for following nodes slice_tensor_12: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(full_default_4, 2, 0, -7) slice_scatter_default_26: "f32[1, 4, 133, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_12, copy_12, 3, 0, 9223372036854775807); slice_tensor_12 = copy_12 = None slice_scatter_default_27: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(full_default_4, slice_scatter_default_26, 2, 0, -7); full_default_4 = slice_scatter_default_26 = None slice_scatter_default_28: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_25, slice_scatter_default_27, 1, -7, -3); slice_scatter_default_25 = slice_scatter_default_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_227: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_28, 1, -7, -3) slice_228: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_227, 2, -7, -3); slice_227 = None slice_223: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_28, 1, -7, -3) slice_224: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_223, 2, -7, -3); slice_223 = None lift_fresh_copy_17: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant17); _tensor_constant17 = None copy_13: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_224, lift_fresh_copy_17); slice_224 = lift_fresh_copy_17 = None # No stacktrace found for following nodes slice_tensor_13: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_28, 1, -7, -3) slice_scatter_default_29: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_13, copy_13, 2, -7, -3); slice_tensor_13 = copy_13 = None slice_scatter_default_30: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_28, slice_scatter_default_29, 1, -7, -3); slice_scatter_default_28 = slice_scatter_default_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_242: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_30, 1, -7, -3) slice_243: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_242, 2, -3, 9223372036854775807); slice_242 = None slice_238: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_30, 1, -7, -3) slice_239: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_238, 2, -3, 9223372036854775807); slice_238 = None lift_fresh_copy_18: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant18); _tensor_constant18 = None copy_14: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_239, lift_fresh_copy_18); slice_239 = lift_fresh_copy_18 = None # No stacktrace found for following nodes slice_tensor_14: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_30, 1, -7, -3) slice_scatter_default_31: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_14, copy_14, 2, -3, 9223372036854775807); slice_tensor_14 = copy_14 = None slice_scatter_default_32: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_30, slice_scatter_default_31, 1, -7, -3); slice_scatter_default_30 = slice_scatter_default_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default_5: "f32[1, 3, 140, 1]" = torch.ops.aten.full.default([1, 3, 140, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_258: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(full_default_5, 2, 0, -7) slice_253: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_32, 1, -3, 9223372036854775807) slice_254: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(slice_253, 2, 0, -7); slice_253 = None lift_fresh_copy_19: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant19); _tensor_constant19 = None copy_15: "f32[1, 3, 133, 1]" = torch.ops.aten.copy.default(slice_254, lift_fresh_copy_19); slice_254 = lift_fresh_copy_19 = None # No stacktrace found for following nodes slice_tensor_15: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(full_default_5, 2, 0, -7) slice_scatter_default_33: "f32[1, 3, 133, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_15, copy_15, 3, 0, 9223372036854775807); slice_tensor_15 = copy_15 = None slice_scatter_default_34: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(full_default_5, slice_scatter_default_33, 2, 0, -7); full_default_5 = slice_scatter_default_33 = None slice_scatter_default_35: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_32, slice_scatter_default_34, 1, -3, 9223372036854775807); slice_scatter_default_32 = slice_scatter_default_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_272: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_35, 1, -3, 9223372036854775807) slice_273: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_272, 2, -7, -3); slice_272 = None slice_268: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_35, 1, -3, 9223372036854775807) slice_269: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_268, 2, -7, -3); slice_268 = None lift_fresh_copy_20: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant20); _tensor_constant20 = None copy_16: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_269, lift_fresh_copy_20); slice_269 = lift_fresh_copy_20 = None # No stacktrace found for following nodes slice_tensor_16: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_35, 1, -3, 9223372036854775807) slice_scatter_default_36: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_16, copy_16, 2, -7, -3); slice_tensor_16 = copy_16 = None slice_scatter_default_37: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_35, slice_scatter_default_36, 1, -3, 9223372036854775807); slice_scatter_default_35 = slice_scatter_default_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_287: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_37, 1, -3, 9223372036854775807) slice_288: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_287, 2, -3, 9223372036854775807); slice_287 = None slice_283: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_37, 1, -3, 9223372036854775807) slice_284: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_283, 2, -3, 9223372036854775807); slice_283 = None lift_fresh_copy_21: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant21); _tensor_constant21 = None copy_17: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_284, lift_fresh_copy_21); slice_284 = lift_fresh_copy_21 = None # No stacktrace found for following nodes slice_tensor_17: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_37, 1, -3, 9223372036854775807) slice_scatter_default_38: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_17, copy_17, 2, -3, 9223372036854775807); slice_tensor_17 = copy_17 = None slice_scatter_default_39: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_37, slice_scatter_default_38, 1, -3, 9223372036854775807); slice_scatter_default_37 = slice_scatter_default_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_67: "f32[1, 20, 7, 20, 7, 1]" = torch.ops.aten.reshape.default(slice_scatter_default_39, [1, 20, 7, 20, 7, 1]); slice_scatter_default_39 = None permute_28: "f32[1, 20, 20, 7, 7, 1]" = torch.ops.aten.permute.default(view_67, [0, 1, 3, 2, 4, 5]); view_67 = None clone_28: "f32[1, 20, 20, 7, 7, 1]" = torch.ops.aten.clone.default(permute_28, memory_format = torch.contiguous_format); permute_28 = None view_68: "f32[400, 7, 7, 1]" = torch.ops.aten.reshape.default(clone_28, [-1, 7, 7, 1]); clone_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_69: "f32[400, 49]" = torch.ops.aten.reshape.default(view_68, [-1, 49]); view_68 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze_6: "f32[400, 1, 49]" = torch.ops.aten.unsqueeze.default(view_69, 1) unsqueeze_7: "f32[400, 49, 1]" = torch.ops.aten.unsqueeze.default(view_69, 2); view_69 = None sub_10: "f32[400, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze_6, unsqueeze_7); unsqueeze_6 = unsqueeze_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq_1: "b8[400, 49, 49]" = torch.ops.aten.eq.Scalar(sub_10, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( full_default_7: "f32[]" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne_1: "b8[400, 49, 49]" = torch.ops.aten.ne.Scalar(sub_10, 0) full_default_6: "f32[]" = torch.ops.aten.full.default([], -100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) where_2: "f32[400, 49, 49]" = torch.ops.aten.where.self(ne_1, full_default_6, sub_10); ne_1 = full_default_6 = sub_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( where_3: "f32[400, 49, 49]" = torch.ops.aten.where.self(eq_1, full_default_7, where_2); eq_1 = full_default_7 = where_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_10: "f32[400, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_3, 1); where_3 = None unsqueeze_11: "f32[1, 400, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_10, 0); unsqueeze_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_40: "f32[1, 400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_108, unsqueeze_11); view_108 = unsqueeze_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_109: "f32[400, 8, 49, 49]" = torch.ops.aten.reshape.default(add_40, [-1, 8, 49, 49]); add_40 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_3: "f32[400, 8, 49, 1]" = torch.ops.aten.amax.default(view_109, [-1], True) sub_15: "f32[400, 8, 49, 49]" = torch.ops.aten.sub.Tensor(view_109, amax_3); view_109 = amax_3 = None exp_3: "f32[400, 8, 49, 49]" = torch.ops.aten.exp.default(sub_15); sub_15 = None sum_4: "f32[400, 8, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_3, [-1], True) div_7: "f32[400, 8, 49, 49]" = torch.ops.aten.div.Tensor(exp_3, sum_4); exp_3 = sum_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_110: "f16[400, 8, 49, 49]" = torch.ops.prims.convert_element_type.default(div_7, torch.float16); div_7 = None expand_14: "f16[400, 8, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_110, [400, 8, 49, 49]); convert_element_type_110 = None view_110: "f16[3200, 49, 49]" = torch.ops.aten.reshape.default(expand_14, [3200, 49, 49]); expand_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_11: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 2); permute_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_15: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(select_11, [400, 8, 49, 32]); select_11 = None clone_46: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_15, memory_format = torch.contiguous_format); expand_15 = None view_111: "f16[3200, 49, 32]" = torch.ops.aten.reshape.default(clone_46, [3200, 49, 32]); clone_46 = None bmm_7: "f16[3200, 49, 32]" = torch.ops.aten.bmm.default(view_110, view_111); view_110 = view_111 = None view_112: "f16[400, 8, 49, 32]" = torch.ops.aten.reshape.default(bmm_7, [400, 8, 49, 32]); bmm_7 = None permute_44: "f16[400, 49, 8, 32]" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None clone_47: "f16[400, 49, 8, 32]" = torch.ops.aten.clone.default(permute_44, memory_format = torch.contiguous_format); permute_44 = None view_113: "f16[400, 49, 256]" = torch.ops.aten.reshape.default(clone_47, [400, 49, 256]); clone_47 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_114: "f16[19600, 256]" = torch.ops.aten.reshape.default(view_113, [19600, 256]); view_113 = None convert_element_type_114: "f16[256, 256]" = torch.ops.prims.convert_element_type.default(arg58_1, torch.float16); arg58_1 = None permute_45: "f16[256, 256]" = torch.ops.aten.permute.default(convert_element_type_114, [1, 0]); convert_element_type_114 = None # No stacktrace found for following nodes mm_default_74: "f16[19600, 256]" = torch.ops.aten.mm.default(view_114, permute_45); view_114 = permute_45 = None add_tensor_74: "f16[19600, 256]" = torch.ops.aten.add.Tensor(mm_default_74, convert_element_type_113); mm_default_74 = convert_element_type_113 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_115: "f16[400, 49, 256]" = torch.ops.aten.reshape.default(add_tensor_74, [400, 49, 256]); add_tensor_74 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_116: "f16[400, 7, 7, 256]" = torch.ops.aten.reshape.default(view_115, [-1, 7, 7, 256]); view_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_117: "f16[1, 20, 20, 7, 7, 256]" = torch.ops.aten.reshape.default(view_116, [1, 20, 20, 7, 7, -1]); view_116 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_46: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.permute.default(view_117, [0, 1, 3, 2, 4, 5]); view_117 = None clone_49: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.clone.default(permute_46, memory_format = torch.contiguous_format); permute_46 = None view_118: "f16[1, 140, 140, 256]" = torch.ops.aten.reshape.default(clone_49, [1, 140, 140, -1]); clone_49 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_6: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_41: "i64[140]" = torch.ops.aten.add.Tensor(iota_6, 137); iota_6 = None fmod_6: "i64[140]" = torch.ops.aten.fmod.Scalar(add_41, 140); add_41 = None index_10: "f16[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(view_118, [None, fmod_6]); view_118 = fmod_6 = None iota_7: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_42: "i64[140]" = torch.ops.aten.add.Tensor(iota_7, 137); iota_7 = None fmod_7: "i64[140]" = torch.ops.aten.fmod.Scalar(add_42, 140); add_42 = None index_11: "f16[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(index_10, [None, None, fmod_7]); index_10 = fmod_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_304: "f16[1, 136, 140, 256]" = torch.ops.aten.slice.Tensor(index_11, 1, 0, 136); index_11 = None slice_305: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_304, 2, 0, 136); slice_304 = None clone_50: "f16[1, 136, 136, 256]" = torch.ops.aten.clone.default(slice_305, memory_format = torch.contiguous_format); slice_305 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_119: "f16[1, 18496, 256]" = torch.ops.aten.reshape.default(clone_50, [1, 18496, 256]); clone_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_43: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_34, view_119); add_34 = view_119 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_118: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_43, torch.float32) var_mean_10 = torch.ops.aten.var_mean.correction(convert_element_type_118, [2], correction = 0, keepdim = True) getitem_20: "f32[1, 18496, 1]" = var_mean_10[0] getitem_21: "f32[1, 18496, 1]" = var_mean_10[1]; var_mean_10 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_127: "f16[256]" = torch.ops.prims.convert_element_type.default(arg65_1, torch.float16); arg65_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_119: "f16[1024]" = torch.ops.prims.convert_element_type.default(arg63_1, torch.float16); arg63_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_16: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_118, getitem_21); convert_element_type_118 = getitem_21 = None add_44: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None rsqrt_10: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_44); add_44 = None mul_37: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_10); sub_16 = rsqrt_10 = None mul_38: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_37, arg60_1); mul_37 = arg60_1 = None add_45: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_38, arg61_1); mul_38 = arg61_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_121: "f16[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_45, torch.float16); add_45 = None view_120: "f16[18496, 256]" = torch.ops.aten.reshape.default(convert_element_type_121, [18496, 256]); convert_element_type_121 = None convert_element_type_120: "f16[1024, 256]" = torch.ops.prims.convert_element_type.default(arg62_1, torch.float16); arg62_1 = None permute_47: "f16[256, 1024]" = torch.ops.aten.permute.default(convert_element_type_120, [1, 0]); convert_element_type_120 = None # No stacktrace found for following nodes mm_default_73: "f16[18496, 1024]" = torch.ops.aten.mm.default(view_120, permute_47); view_120 = permute_47 = None add_tensor_73: "f16[18496, 1024]" = torch.ops.aten.add.Tensor(mm_default_73, convert_element_type_119); mm_default_73 = convert_element_type_119 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_121: "f16[1, 18496, 1024]" = torch.ops.aten.reshape.default(add_tensor_73, [1, 18496, 1024]); add_tensor_73 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_125: "f32[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(view_121, torch.float32); view_121 = None mul_39: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_125, 0.5) mul_40: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_125, 0.7071067811865476); convert_element_type_125 = None erf_3: "f32[1, 18496, 1024]" = torch.ops.aten.erf.default(mul_40); mul_40 = None add_46: "f32[1, 18496, 1024]" = torch.ops.aten.add.Tensor(erf_3, 1); erf_3 = None mul_41: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(mul_39, add_46); mul_39 = add_46 = None convert_element_type_126: "f16[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(mul_41, torch.float16); mul_41 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_122: "f16[18496, 1024]" = torch.ops.aten.reshape.default(convert_element_type_126, [18496, 1024]); convert_element_type_126 = None convert_element_type_128: "f16[256, 1024]" = torch.ops.prims.convert_element_type.default(arg64_1, torch.float16); arg64_1 = None permute_48: "f16[1024, 256]" = torch.ops.aten.permute.default(convert_element_type_128, [1, 0]); convert_element_type_128 = None # No stacktrace found for following nodes mm_default_72: "f16[18496, 256]" = torch.ops.aten.mm.default(view_122, permute_48); view_122 = permute_48 = None add_tensor_72: "f16[18496, 256]" = torch.ops.aten.add.Tensor(mm_default_72, convert_element_type_127); mm_default_72 = convert_element_type_127 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_123: "f16[1, 18496, 256]" = torch.ops.aten.reshape.default(add_tensor_72, [1, 18496, 256]); add_tensor_72 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_47: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_43, view_123); add_43 = view_123 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:344 in forward, code: x = x.view(B, H, W, C) view_124: "f16[1, 136, 136, 256]" = torch.ops.aten.reshape.default(add_47, [1, 136, 136, 256]) # File: /workspace/networks/encoders/swin/swin_transformer.py:351 in forward, code: x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C slice_308: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 1, 0, 9223372036854775807, 2) slice_309: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_308, 2, 0, 9223372036854775807, 2); slice_308 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:352 in forward, code: x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C slice_312: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 1, 1, 9223372036854775807, 2) slice_313: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_312, 2, 0, 9223372036854775807, 2); slice_312 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:353 in forward, code: x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C slice_316: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 1, 0, 9223372036854775807, 2) slice_317: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_316, 2, 1, 9223372036854775807, 2); slice_316 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:354 in forward, code: x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C slice_320: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 1, 1, 9223372036854775807, 2); view_124 = None slice_321: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_320, 2, 1, 9223372036854775807, 2); slice_320 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:355 in forward, code: x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C cat_1: "f16[1, 68, 68, 1024]" = torch.ops.aten.cat.default([slice_309, slice_313, slice_317, slice_321], -1); slice_309 = slice_313 = slice_317 = slice_321 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:356 in forward, code: x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C view_125: "f16[1, 4624, 1024]" = torch.ops.aten.reshape.default(cat_1, [1, -1, 1024]); cat_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_132: "f32[1, 4624, 1024]" = torch.ops.prims.convert_element_type.default(view_125, torch.float32); view_125 = None var_mean_11 = torch.ops.aten.var_mean.correction(convert_element_type_132, [2], correction = 0, keepdim = True) getitem_22: "f32[1, 4624, 1]" = var_mean_11[0] getitem_23: "f32[1, 4624, 1]" = var_mean_11[1]; var_mean_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_137: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_47, torch.float32); add_47 = None var_mean_12 = torch.ops.aten.var_mean.correction(convert_element_type_137, [2], correction = 0, keepdim = True) getitem_24: "f32[1, 18496, 1]" = var_mean_12[0] getitem_25: "f32[1, 18496, 1]" = var_mean_12[1]; var_mean_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant24: "f32[]" = self._tensor_constant24 _tensor_constant25: "f32[]" = self._tensor_constant25 _tensor_constant26: "f32[]" = self._tensor_constant26 _tensor_constant27: "f32[]" = self._tensor_constant27 _tensor_constant28: "f32[]" = self._tensor_constant28 _tensor_constant29: "f32[]" = self._tensor_constant29 _tensor_constant30: "f32[]" = self._tensor_constant30 _tensor_constant31: "f32[]" = self._tensor_constant31 _tensor_constant32: "f32[]" = self._tensor_constant32 # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_17: "f32[1, 4624, 1024]" = torch.ops.aten.sub.Tensor(convert_element_type_132, getitem_23); convert_element_type_132 = getitem_23 = None add_48: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None rsqrt_11: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_48); add_48 = None mul_42: "f32[1, 4624, 1024]" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_11); sub_17 = rsqrt_11 = None mul_43: "f32[1, 4624, 1024]" = torch.ops.aten.mul.Tensor(mul_42, arg66_1); mul_42 = arg66_1 = None add_49: "f32[1, 4624, 1024]" = torch.ops.aten.add.Tensor(mul_43, arg67_1); mul_43 = arg67_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_134: "f16[1, 4624, 1024]" = torch.ops.prims.convert_element_type.default(add_49, torch.float16); add_49 = None view_126: "f16[4624, 1024]" = torch.ops.aten.reshape.default(convert_element_type_134, [4624, 1024]); convert_element_type_134 = None convert_element_type_133: "f16[512, 1024]" = torch.ops.prims.convert_element_type.default(arg68_1, torch.float16); arg68_1 = None permute_49: "f16[1024, 512]" = torch.ops.aten.permute.default(convert_element_type_133, [1, 0]); convert_element_type_133 = None mm_1: "f16[4624, 512]" = torch.ops.aten.mm.default(view_126, permute_49); view_126 = permute_49 = None view_127: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(mm_1, [1, 4624, 512]); mm_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_140: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(view_127, torch.float32) var_mean_13 = torch.ops.aten.var_mean.correction(convert_element_type_140, [2], correction = 0, keepdim = True) getitem_26: "f32[1, 4624, 1]" = var_mean_13[0] getitem_27: "f32[1, 4624, 1]" = var_mean_13[1]; var_mean_13 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_152: "f16[512]" = torch.ops.prims.convert_element_type.default(arg78_1, torch.float16); arg78_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_141: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg74_1, torch.float16); arg74_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_20: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_140, getitem_27); convert_element_type_140 = getitem_27 = None add_52: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_26, 1e-05); getitem_26 = None rsqrt_13: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_52); add_52 = None mul_48: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_13); sub_20 = rsqrt_13 = None mul_49: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_48, arg71_1); mul_48 = arg71_1 = None add_53: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_49, arg72_1); mul_49 = arg72_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_133: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_53, [1, 68, 68, 512]); add_53 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_4: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_133, [0, 0, 0, 2, 0, 2], 0.0); view_133 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_134: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_4, [1, 10, 7, 10, 7, 512]); constant_pad_nd_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_53: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_134, [0, 1, 3, 2, 4, 5]); view_134 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_55: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_53, memory_format = torch.contiguous_format); permute_53 = None view_135: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_55, [-1, 7, 7, 512]); clone_55 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_136: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_135, [-1, 49, 512]); view_135 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_143: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_136, torch.float16); view_136 = None view_137: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_143, [4900, 512]); convert_element_type_143 = None convert_element_type_142: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg73_1, torch.float16); arg73_1 = None permute_54: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_142, [1, 0]); convert_element_type_142 = None # No stacktrace found for following nodes mm_default_71: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_137, permute_54); view_137 = permute_54 = None add_tensor_71: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_71, convert_element_type_141); mm_default_71 = convert_element_type_141 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_138: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_71, [100, 49, 1536]); add_tensor_71 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_139: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_138, [100, 49, 3, 16, 32]); view_138 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_55: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_139, [2, 0, 3, 1, 4]); view_139 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_12: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_50: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_12, 0.1767766952966369); select_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_16: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_50, [100, 16, 49, 32]); mul_50 = None clone_56: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_16, memory_format = torch.contiguous_format); expand_16 = None view_140: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_56, [1600, 49, 32]); clone_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_13: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_56: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_13, [0, 1, 3, 2]); select_13 = None expand_17: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_56, [100, 16, 32, 49]); permute_56 = None clone_57: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_17, memory_format = torch.contiguous_format); expand_17 = None view_141: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_57, [1600, 32, 49]); clone_57 = None bmm_8: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_140, view_141); view_140 = view_141 = None view_142: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_8, [100, 16, 49, 49]); bmm_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_143: "i64[2401]" = torch.ops.aten.reshape.default(arg76_1, [-1]); arg76_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_12: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg75_1, [view_143]); arg75_1 = view_143 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_144: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_12, [49, 49, -1]); index_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_57: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_144, [2, 0, 1]); view_144 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_58: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_57, memory_format = torch.contiguous_format); permute_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_14: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_58, 0); clone_58 = None add_54: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_142, unsqueeze_14); view_142 = unsqueeze_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_4: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_54, [-1], True) sub_21: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_54, amax_4); add_54 = amax_4 = None exp_4: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_21); sub_21 = None sum_5: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_4, [-1], True) div_10: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_4, sum_5); exp_4 = sum_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_149: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_10, torch.float16); div_10 = None expand_18: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_149, [100, 16, 49, 49]); convert_element_type_149 = None view_145: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_18, [1600, 49, 49]); expand_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_14: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 2); permute_55 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_19: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_14, [100, 16, 49, 32]); select_14 = None clone_60: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_19, memory_format = torch.contiguous_format); expand_19 = None view_146: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_60, [1600, 49, 32]); clone_60 = None bmm_9: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_145, view_146); view_145 = view_146 = None view_147: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_9, [100, 16, 49, 32]); bmm_9 = None permute_58: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_147, [0, 2, 1, 3]); view_147 = None clone_61: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_58, memory_format = torch.contiguous_format); permute_58 = None view_148: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_61, [100, 49, 512]); clone_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_149: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_148, [4900, 512]); view_148 = None convert_element_type_153: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg77_1, torch.float16); arg77_1 = None permute_59: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_153, [1, 0]); convert_element_type_153 = None # No stacktrace found for following nodes mm_default_70: "f16[4900, 512]" = torch.ops.aten.mm.default(view_149, permute_59); view_149 = permute_59 = None add_tensor_70: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_70, convert_element_type_152); mm_default_70 = convert_element_type_152 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_150: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_70, [100, 49, 512]); add_tensor_70 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_151: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_150, [-1, 7, 7, 512]); view_150 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_152: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_151, [1, 10, 10, 7, 7, -1]); view_151 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_60: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_152, [0, 1, 3, 2, 4, 5]); view_152 = None clone_63: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_60, memory_format = torch.contiguous_format); permute_60 = None view_153: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_63, [1, 70, 70, -1]); clone_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_455: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_153, 1, 0, 68); view_153 = None slice_456: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_455, 2, 0, 68); slice_455 = None clone_64: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_456, memory_format = torch.contiguous_format); slice_456 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_154: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_64, [1, 4624, 512]); clone_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_55: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(view_127, view_154); view_127 = view_154 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_157: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_55, torch.float32) var_mean_14 = torch.ops.aten.var_mean.correction(convert_element_type_157, [2], correction = 0, keepdim = True) getitem_28: "f32[1, 4624, 1]" = var_mean_14[0] getitem_29: "f32[1, 4624, 1]" = var_mean_14[1]; var_mean_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_166: "f16[512]" = torch.ops.prims.convert_element_type.default(arg84_1, torch.float16); arg84_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_158: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg82_1, torch.float16); arg82_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_22: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_157, getitem_29); convert_element_type_157 = getitem_29 = None add_56: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_28, 1e-05); getitem_28 = None rsqrt_14: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_56); add_56 = None mul_51: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_14); sub_22 = rsqrt_14 = None mul_52: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_51, arg79_1); mul_51 = arg79_1 = None add_57: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_52, arg80_1); mul_52 = arg80_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_160: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_57, torch.float16); add_57 = None view_155: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_160, [4624, 512]); convert_element_type_160 = None convert_element_type_159: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg81_1, torch.float16); arg81_1 = None permute_61: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_159, [1, 0]); convert_element_type_159 = None # No stacktrace found for following nodes mm_default_69: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_155, permute_61); view_155 = permute_61 = None add_tensor_69: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_69, convert_element_type_158); mm_default_69 = convert_element_type_158 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_156: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_69, [1, 4624, 2048]); add_tensor_69 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_164: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_156, torch.float32); view_156 = None mul_53: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_164, 0.5) mul_54: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_164, 0.7071067811865476); convert_element_type_164 = None erf_4: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_54); mul_54 = None add_58: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_4, 1); erf_4 = None mul_55: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_53, add_58); mul_53 = add_58 = None convert_element_type_165: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_55, torch.float16); mul_55 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_157: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_165, [4624, 2048]); convert_element_type_165 = None convert_element_type_167: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg83_1, torch.float16); arg83_1 = None permute_62: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_167, [1, 0]); convert_element_type_167 = None # No stacktrace found for following nodes mm_default_68: "f16[4624, 512]" = torch.ops.aten.mm.default(view_157, permute_62); view_157 = permute_62 = None add_tensor_68: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_68, convert_element_type_166); mm_default_68 = convert_element_type_166 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_158: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_68, [1, 4624, 512]); add_tensor_68 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_59: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_55, view_158); add_55 = view_158 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_171: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_59, torch.float32) var_mean_15 = torch.ops.aten.var_mean.correction(convert_element_type_171, [2], correction = 0, keepdim = True) getitem_30: "f32[1, 4624, 1]" = var_mean_15[0] getitem_31: "f32[1, 4624, 1]" = var_mean_15[1]; var_mean_15 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_183: "f16[512]" = torch.ops.prims.convert_element_type.default(arg92_1, torch.float16); arg92_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_172: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg88_1, torch.float16); arg88_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_23: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_171, getitem_31); convert_element_type_171 = getitem_31 = None add_60: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_30, 1e-05); getitem_30 = None rsqrt_15: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_60); add_60 = None mul_56: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_15); sub_23 = rsqrt_15 = None mul_57: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_56, arg85_1); mul_56 = arg85_1 = None add_61: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_57, arg86_1); mul_57 = arg86_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_159: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_61, [1, 68, 68, 512]); add_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_5: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_159, [0, 0, 0, 2, 0, 2], 0.0); view_159 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_8: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_62: "i64[70]" = torch.ops.aten.add.Tensor(iota_8, 3); iota_8 = None fmod_8: "i64[70]" = torch.ops.aten.fmod.Scalar(add_62, 70); add_62 = None index_13: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_5, [None, fmod_8]); constant_pad_nd_5 = fmod_8 = None iota_9: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_63: "i64[70]" = torch.ops.aten.add.Tensor(iota_9, 3); iota_9 = None fmod_9: "i64[70]" = torch.ops.aten.fmod.Scalar(add_63, 70); add_63 = None index_14: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_13, [None, None, fmod_9]); index_13 = fmod_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_160: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_14, [1, 10, 7, 10, 7, 512]); index_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_63: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_160, [0, 1, 3, 2, 4, 5]); view_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_67: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_63, memory_format = torch.contiguous_format); permute_63 = None view_161: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_67, [-1, 7, 7, 512]); clone_67 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_162: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_161, [-1, 49, 512]); view_161 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_174: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_162, torch.float16); view_162 = None view_163: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_174, [4900, 512]); convert_element_type_174 = None convert_element_type_173: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg87_1, torch.float16); arg87_1 = None permute_64: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_173, [1, 0]); convert_element_type_173 = None # No stacktrace found for following nodes mm_default_67: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_163, permute_64); view_163 = permute_64 = None add_tensor_67: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_67, convert_element_type_172); mm_default_67 = convert_element_type_172 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_164: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_67, [100, 49, 1536]); add_tensor_67 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_165: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_164, [100, 49, 3, 16, 32]); view_164 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_65: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_165, [2, 0, 3, 1, 4]); view_165 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_15: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_58: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_15, 0.1767766952966369); select_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_20: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_58, [100, 16, 49, 32]); mul_58 = None clone_68: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_20, memory_format = torch.contiguous_format); expand_20 = None view_166: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_68, [1600, 49, 32]); clone_68 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_16: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_66: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_16, [0, 1, 3, 2]); select_16 = None expand_21: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_66, [100, 16, 32, 49]); permute_66 = None clone_69: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_21, memory_format = torch.contiguous_format); expand_21 = None view_167: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_69, [1600, 32, 49]); clone_69 = None bmm_10: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_166, view_167); view_166 = view_167 = None view_168: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_10, [100, 16, 49, 49]); bmm_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_169: "i64[2401]" = torch.ops.aten.reshape.default(arg90_1, [-1]); arg90_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_15: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg89_1, [view_169]); arg89_1 = view_169 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_170: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_15, [49, 49, -1]); index_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_67: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_170, [2, 0, 1]); view_170 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_70: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_67, memory_format = torch.contiguous_format); permute_67 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_15: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_70, 0); clone_70 = None add_64: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_168, unsqueeze_15); view_168 = unsqueeze_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_171: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_64, [1, 100, 16, 49, 49]); add_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full_2: "f32[1, 70, 70, 1]" = torch.ops.aten.full.default([1, 70, 70, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_328: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(full_2, 1, 0, -7) slice_329: "f32[1, 63, 63, 1]" = torch.ops.aten.slice.Tensor(slice_328, 2, 0, -7); slice_328 = None slice_324: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(full_2, 1, 0, -7) slice_325: "f32[1, 63, 63, 1]" = torch.ops.aten.slice.Tensor(slice_324, 2, 0, -7); slice_324 = None lift_fresh_copy_24: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant24); _tensor_constant24 = None copy_18: "f32[1, 63, 63, 1]" = torch.ops.aten.copy.default(slice_325, lift_fresh_copy_24); slice_325 = lift_fresh_copy_24 = None # No stacktrace found for following nodes slice_tensor_18: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(full_2, 1, 0, -7) slice_scatter_default_40: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_18, copy_18, 2, 0, -7); slice_tensor_18 = copy_18 = None slice_scatter_default_41: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(full_2, slice_scatter_default_40, 1, 0, -7); full_2 = slice_scatter_default_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_343: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_41, 1, 0, -7) slice_344: "f32[1, 63, 4, 1]" = torch.ops.aten.slice.Tensor(slice_343, 2, -7, -3); slice_343 = None slice_339: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_41, 1, 0, -7) slice_340: "f32[1, 63, 4, 1]" = torch.ops.aten.slice.Tensor(slice_339, 2, -7, -3); slice_339 = None lift_fresh_copy_25: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant25); _tensor_constant25 = None copy_19: "f32[1, 63, 4, 1]" = torch.ops.aten.copy.default(slice_340, lift_fresh_copy_25); slice_340 = lift_fresh_copy_25 = None # No stacktrace found for following nodes slice_tensor_19: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_41, 1, 0, -7) slice_scatter_default_42: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_19, copy_19, 2, -7, -3); slice_tensor_19 = copy_19 = None slice_scatter_default_43: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_41, slice_scatter_default_42, 1, 0, -7); slice_scatter_default_41 = slice_scatter_default_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_358: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_43, 1, 0, -7) slice_359: "f32[1, 63, 3, 1]" = torch.ops.aten.slice.Tensor(slice_358, 2, -3, 9223372036854775807); slice_358 = None slice_354: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_43, 1, 0, -7) slice_355: "f32[1, 63, 3, 1]" = torch.ops.aten.slice.Tensor(slice_354, 2, -3, 9223372036854775807); slice_354 = None lift_fresh_copy_26: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant26); _tensor_constant26 = None copy_20: "f32[1, 63, 3, 1]" = torch.ops.aten.copy.default(slice_355, lift_fresh_copy_26); slice_355 = lift_fresh_copy_26 = None # No stacktrace found for following nodes slice_tensor_20: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_43, 1, 0, -7) slice_scatter_default_44: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_20, copy_20, 2, -3, 9223372036854775807); slice_tensor_20 = copy_20 = None slice_scatter_default_45: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_43, slice_scatter_default_44, 1, 0, -7); slice_scatter_default_43 = slice_scatter_default_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default_8: "f32[1, 4, 70, 1]" = torch.ops.aten.full.default([1, 4, 70, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_374: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(full_default_8, 2, 0, -7) slice_369: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_45, 1, -7, -3) slice_370: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(slice_369, 2, 0, -7); slice_369 = None lift_fresh_copy_27: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant27); _tensor_constant27 = None copy_21: "f32[1, 4, 63, 1]" = torch.ops.aten.copy.default(slice_370, lift_fresh_copy_27); slice_370 = lift_fresh_copy_27 = None # No stacktrace found for following nodes slice_tensor_21: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(full_default_8, 2, 0, -7) slice_scatter_default_46: "f32[1, 4, 63, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_21, copy_21, 3, 0, 9223372036854775807); slice_tensor_21 = copy_21 = None slice_scatter_default_47: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(full_default_8, slice_scatter_default_46, 2, 0, -7); full_default_8 = slice_scatter_default_46 = None slice_scatter_default_48: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_45, slice_scatter_default_47, 1, -7, -3); slice_scatter_default_45 = slice_scatter_default_47 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_388: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_48, 1, -7, -3) slice_389: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_388, 2, -7, -3); slice_388 = None slice_384: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_48, 1, -7, -3) slice_385: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_384, 2, -7, -3); slice_384 = None lift_fresh_copy_28: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant28); _tensor_constant28 = None copy_22: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_385, lift_fresh_copy_28); slice_385 = lift_fresh_copy_28 = None # No stacktrace found for following nodes slice_tensor_22: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_48, 1, -7, -3) slice_scatter_default_49: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_22, copy_22, 2, -7, -3); slice_tensor_22 = copy_22 = None slice_scatter_default_50: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_48, slice_scatter_default_49, 1, -7, -3); slice_scatter_default_48 = slice_scatter_default_49 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_403: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_50, 1, -7, -3) slice_404: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_403, 2, -3, 9223372036854775807); slice_403 = None slice_399: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_50, 1, -7, -3) slice_400: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_399, 2, -3, 9223372036854775807); slice_399 = None lift_fresh_copy_29: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant29); _tensor_constant29 = None copy_23: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_400, lift_fresh_copy_29); slice_400 = lift_fresh_copy_29 = None # No stacktrace found for following nodes slice_tensor_23: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_50, 1, -7, -3) slice_scatter_default_51: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_23, copy_23, 2, -3, 9223372036854775807); slice_tensor_23 = copy_23 = None slice_scatter_default_52: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_50, slice_scatter_default_51, 1, -7, -3); slice_scatter_default_50 = slice_scatter_default_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt full_default_9: "f32[1, 3, 70, 1]" = torch.ops.aten.full.default([1, 3, 70, 1], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) slice_419: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(full_default_9, 2, 0, -7) slice_414: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_52, 1, -3, 9223372036854775807) slice_415: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(slice_414, 2, 0, -7); slice_414 = None lift_fresh_copy_30: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant30); _tensor_constant30 = None copy_24: "f32[1, 3, 63, 1]" = torch.ops.aten.copy.default(slice_415, lift_fresh_copy_30); slice_415 = lift_fresh_copy_30 = None # No stacktrace found for following nodes slice_tensor_24: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(full_default_9, 2, 0, -7) slice_scatter_default_53: "f32[1, 3, 63, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_24, copy_24, 3, 0, 9223372036854775807); slice_tensor_24 = copy_24 = None slice_scatter_default_54: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(full_default_9, slice_scatter_default_53, 2, 0, -7); full_default_9 = slice_scatter_default_53 = None slice_scatter_default_55: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_52, slice_scatter_default_54, 1, -3, 9223372036854775807); slice_scatter_default_52 = slice_scatter_default_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_433: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_55, 1, -3, 9223372036854775807) slice_434: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_433, 2, -7, -3); slice_433 = None slice_429: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_55, 1, -3, 9223372036854775807) slice_430: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_429, 2, -7, -3); slice_429 = None lift_fresh_copy_31: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant31); _tensor_constant31 = None copy_25: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_430, lift_fresh_copy_31); slice_430 = lift_fresh_copy_31 = None # No stacktrace found for following nodes slice_tensor_25: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_55, 1, -3, 9223372036854775807) slice_scatter_default_56: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_25, copy_25, 2, -7, -3); slice_tensor_25 = copy_25 = None slice_scatter_default_57: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_55, slice_scatter_default_56, 1, -3, 9223372036854775807); slice_scatter_default_55 = slice_scatter_default_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt slice_448: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_57, 1, -3, 9223372036854775807) slice_449: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_448, 2, -3, 9223372036854775807); slice_448 = None slice_444: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_57, 1, -3, 9223372036854775807) slice_445: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_444, 2, -3, 9223372036854775807); slice_444 = None lift_fresh_copy_32: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant32); _tensor_constant32 = None copy_26: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_445, lift_fresh_copy_32); slice_445 = lift_fresh_copy_32 = None # No stacktrace found for following nodes slice_tensor_26: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_default_57, 1, -3, 9223372036854775807) slice_scatter_default_58: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_tensor_26, copy_26, 2, -3, 9223372036854775807); slice_tensor_26 = copy_26 = None slice_scatter_default_59: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_default_57, slice_scatter_default_58, 1, -3, 9223372036854775807); slice_scatter_default_57 = slice_scatter_default_58 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_130: "f32[1, 10, 7, 10, 7, 1]" = torch.ops.aten.reshape.default(slice_scatter_default_59, [1, 10, 7, 10, 7, 1]); slice_scatter_default_59 = None permute_52: "f32[1, 10, 10, 7, 7, 1]" = torch.ops.aten.permute.default(view_130, [0, 1, 3, 2, 4, 5]); view_130 = None clone_54: "f32[1, 10, 10, 7, 7, 1]" = torch.ops.aten.clone.default(permute_52, memory_format = torch.contiguous_format); permute_52 = None view_131: "f32[100, 7, 7, 1]" = torch.ops.aten.reshape.default(clone_54, [-1, 7, 7, 1]); clone_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_132: "f32[100, 49]" = torch.ops.aten.reshape.default(view_131, [-1, 49]); view_131 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze_12: "f32[100, 1, 49]" = torch.ops.aten.unsqueeze.default(view_132, 1) unsqueeze_13: "f32[100, 49, 1]" = torch.ops.aten.unsqueeze.default(view_132, 2); view_132 = None sub_19: "f32[100, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze_12, unsqueeze_13); unsqueeze_12 = unsqueeze_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq_2: "b8[100, 49, 49]" = torch.ops.aten.eq.Scalar(sub_19, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( full_default_11: "f32[]" = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne_2: "b8[100, 49, 49]" = torch.ops.aten.ne.Scalar(sub_19, 0) full_default_10: "f32[]" = torch.ops.aten.full.default([], -100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) where_4: "f32[100, 49, 49]" = torch.ops.aten.where.self(ne_2, full_default_10, sub_19); ne_2 = full_default_10 = sub_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( where_5: "f32[100, 49, 49]" = torch.ops.aten.where.self(eq_2, full_default_11, where_4); eq_2 = full_default_11 = where_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_16: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_17: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_16, 0); unsqueeze_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_65: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_171, unsqueeze_17); view_171 = unsqueeze_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_172: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_65, [-1, 16, 49, 49]); add_65 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_5: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_172, [-1], True) sub_24: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_172, amax_5); view_172 = amax_5 = None exp_5: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_24); sub_24 = None sum_6: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_5, [-1], True) div_11: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_5, sum_6); exp_5 = sum_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_180: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_11, torch.float16); div_11 = None expand_22: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_180, [100, 16, 49, 49]); convert_element_type_180 = None view_173: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_22, [1600, 49, 49]); expand_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_17: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 2); permute_65 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_23: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_17, [100, 16, 49, 32]); select_17 = None clone_72: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_23, memory_format = torch.contiguous_format); expand_23 = None view_174: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_72, [1600, 49, 32]); clone_72 = None bmm_11: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_173, view_174); view_173 = view_174 = None view_175: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_11, [100, 16, 49, 32]); bmm_11 = None permute_68: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_175, [0, 2, 1, 3]); view_175 = None clone_73: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_68, memory_format = torch.contiguous_format); permute_68 = None view_176: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_73, [100, 49, 512]); clone_73 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_177: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_176, [4900, 512]); view_176 = None convert_element_type_184: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg91_1, torch.float16); arg91_1 = None permute_69: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_184, [1, 0]); convert_element_type_184 = None # No stacktrace found for following nodes mm_default_66: "f16[4900, 512]" = torch.ops.aten.mm.default(view_177, permute_69); view_177 = permute_69 = None add_tensor_66: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_66, convert_element_type_183); mm_default_66 = convert_element_type_183 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_178: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_66, [100, 49, 512]); add_tensor_66 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_179: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_178, [-1, 7, 7, 512]); view_178 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_180: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_179, [1, 10, 10, 7, 7, -1]); view_179 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_70: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_180, [0, 1, 3, 2, 4, 5]); view_180 = None clone_75: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_70, memory_format = torch.contiguous_format); permute_70 = None view_181: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_75, [1, 70, 70, -1]); clone_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_10: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_66: "i64[70]" = torch.ops.aten.add.Tensor(iota_10, 67); iota_10 = None fmod_10: "i64[70]" = torch.ops.aten.fmod.Scalar(add_66, 70); add_66 = None index_16: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_181, [None, fmod_10]); view_181 = fmod_10 = None iota_11: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_67: "i64[70]" = torch.ops.aten.add.Tensor(iota_11, 67); iota_11 = None fmod_11: "i64[70]" = torch.ops.aten.fmod.Scalar(add_67, 70); add_67 = None index_17: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_16, [None, None, fmod_11]); index_16 = fmod_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_465: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_17, 1, 0, 68); index_17 = None slice_466: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_465, 2, 0, 68); slice_465 = None clone_76: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_466, memory_format = torch.contiguous_format); slice_466 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_182: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_76, [1, 4624, 512]); clone_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_68: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_59, view_182); add_59 = view_182 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_188: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_68, torch.float32) var_mean_16 = torch.ops.aten.var_mean.correction(convert_element_type_188, [2], correction = 0, keepdim = True) getitem_32: "f32[1, 4624, 1]" = var_mean_16[0] getitem_33: "f32[1, 4624, 1]" = var_mean_16[1]; var_mean_16 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_197: "f16[512]" = torch.ops.prims.convert_element_type.default(arg98_1, torch.float16); arg98_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_189: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg96_1, torch.float16); arg96_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_25: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_188, getitem_33); convert_element_type_188 = getitem_33 = None add_69: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_32, 1e-05); getitem_32 = None rsqrt_16: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_69); add_69 = None mul_59: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_25, rsqrt_16); sub_25 = rsqrt_16 = None mul_60: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_59, arg93_1); mul_59 = arg93_1 = None add_70: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_60, arg94_1); mul_60 = arg94_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_191: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_70, torch.float16); add_70 = None view_183: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_191, [4624, 512]); convert_element_type_191 = None convert_element_type_190: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg95_1, torch.float16); arg95_1 = None permute_71: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_190, [1, 0]); convert_element_type_190 = None # No stacktrace found for following nodes mm_default_65: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_183, permute_71); view_183 = permute_71 = None add_tensor_65: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_65, convert_element_type_189); mm_default_65 = convert_element_type_189 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_184: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_65, [1, 4624, 2048]); add_tensor_65 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_195: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_184, torch.float32); view_184 = None mul_61: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_195, 0.5) mul_62: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_195, 0.7071067811865476); convert_element_type_195 = None erf_5: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_62); mul_62 = None add_71: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_5, 1); erf_5 = None mul_63: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_61, add_71); mul_61 = add_71 = None convert_element_type_196: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_63, torch.float16); mul_63 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_185: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_196, [4624, 2048]); convert_element_type_196 = None convert_element_type_198: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg97_1, torch.float16); arg97_1 = None permute_72: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_198, [1, 0]); convert_element_type_198 = None # No stacktrace found for following nodes mm_default_64: "f16[4624, 512]" = torch.ops.aten.mm.default(view_185, permute_72); view_185 = permute_72 = None add_tensor_64: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_64, convert_element_type_197); mm_default_64 = convert_element_type_197 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_186: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_64, [1, 4624, 512]); add_tensor_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_72: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_68, view_186); add_68 = view_186 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_202: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_72, torch.float32) var_mean_17 = torch.ops.aten.var_mean.correction(convert_element_type_202, [2], correction = 0, keepdim = True) getitem_34: "f32[1, 4624, 1]" = var_mean_17[0] getitem_35: "f32[1, 4624, 1]" = var_mean_17[1]; var_mean_17 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_214: "f16[512]" = torch.ops.prims.convert_element_type.default(arg106_1, torch.float16); arg106_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_203: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg102_1, torch.float16); arg102_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_26: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_202, getitem_35); convert_element_type_202 = getitem_35 = None add_73: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_34, 1e-05); getitem_34 = None rsqrt_17: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_73); add_73 = None mul_64: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_26, rsqrt_17); sub_26 = rsqrt_17 = None mul_65: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_64, arg99_1); mul_64 = arg99_1 = None add_74: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_65, arg100_1); mul_65 = arg100_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_187: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_74, [1, 68, 68, 512]); add_74 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_6: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_187, [0, 0, 0, 2, 0, 2], 0.0); view_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_188: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_6, [1, 10, 7, 10, 7, 512]); constant_pad_nd_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_73: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_188, [0, 1, 3, 2, 4, 5]); view_188 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_79: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_73, memory_format = torch.contiguous_format); permute_73 = None view_189: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_79, [-1, 7, 7, 512]); clone_79 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_190: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_189, [-1, 49, 512]); view_189 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_205: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_190, torch.float16); view_190 = None view_191: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_205, [4900, 512]); convert_element_type_205 = None convert_element_type_204: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg101_1, torch.float16); arg101_1 = None permute_74: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_204, [1, 0]); convert_element_type_204 = None # No stacktrace found for following nodes mm_default_63: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_191, permute_74); view_191 = permute_74 = None add_tensor_63: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_63, convert_element_type_203); mm_default_63 = convert_element_type_203 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_192: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_63, [100, 49, 1536]); add_tensor_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_193: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_192, [100, 49, 3, 16, 32]); view_192 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_75: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_193, [2, 0, 3, 1, 4]); view_193 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_18: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_66: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_18, 0.1767766952966369); select_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_24: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_66, [100, 16, 49, 32]); mul_66 = None clone_80: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_24, memory_format = torch.contiguous_format); expand_24 = None view_194: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_80, [1600, 49, 32]); clone_80 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_19: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_76: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_19, [0, 1, 3, 2]); select_19 = None expand_25: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_76, [100, 16, 32, 49]); permute_76 = None clone_81: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_25, memory_format = torch.contiguous_format); expand_25 = None view_195: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_81, [1600, 32, 49]); clone_81 = None bmm_12: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_194, view_195); view_194 = view_195 = None view_196: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_12, [100, 16, 49, 49]); bmm_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_197: "i64[2401]" = torch.ops.aten.reshape.default(arg104_1, [-1]); arg104_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_18: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg103_1, [view_197]); arg103_1 = view_197 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_198: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_18, [49, 49, -1]); index_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_77: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_198, [2, 0, 1]); view_198 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_82: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_77, memory_format = torch.contiguous_format); permute_77 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_18: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_82, 0); clone_82 = None add_75: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_196, unsqueeze_18); view_196 = unsqueeze_18 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_6: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_75, [-1], True) sub_27: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_75, amax_6); add_75 = amax_6 = None exp_6: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_27); sub_27 = None sum_7: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_6, [-1], True) div_12: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_6, sum_7); exp_6 = sum_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_211: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_12, torch.float16); div_12 = None expand_26: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_211, [100, 16, 49, 49]); convert_element_type_211 = None view_199: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_26, [1600, 49, 49]); expand_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_20: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 2); permute_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_27: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_20, [100, 16, 49, 32]); select_20 = None clone_84: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_27, memory_format = torch.contiguous_format); expand_27 = None view_200: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_84, [1600, 49, 32]); clone_84 = None bmm_13: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_199, view_200); view_199 = view_200 = None view_201: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_13, [100, 16, 49, 32]); bmm_13 = None permute_78: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_201, [0, 2, 1, 3]); view_201 = None clone_85: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_78, memory_format = torch.contiguous_format); permute_78 = None view_202: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_85, [100, 49, 512]); clone_85 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_203: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_202, [4900, 512]); view_202 = None convert_element_type_215: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg105_1, torch.float16); arg105_1 = None permute_79: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_215, [1, 0]); convert_element_type_215 = None # No stacktrace found for following nodes mm_default_62: "f16[4900, 512]" = torch.ops.aten.mm.default(view_203, permute_79); view_203 = permute_79 = None add_tensor_62: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_62, convert_element_type_214); mm_default_62 = convert_element_type_214 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_204: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_62, [100, 49, 512]); add_tensor_62 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_205: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_204, [-1, 7, 7, 512]); view_204 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_206: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_205, [1, 10, 10, 7, 7, -1]); view_205 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_80: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_206, [0, 1, 3, 2, 4, 5]); view_206 = None clone_87: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_80, memory_format = torch.contiguous_format); permute_80 = None view_207: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_87, [1, 70, 70, -1]); clone_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_469: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_207, 1, 0, 68); view_207 = None slice_470: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_469, 2, 0, 68); slice_469 = None clone_88: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_470, memory_format = torch.contiguous_format); slice_470 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_208: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_88, [1, 4624, 512]); clone_88 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_76: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_72, view_208); add_72 = view_208 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_219: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_76, torch.float32) var_mean_18 = torch.ops.aten.var_mean.correction(convert_element_type_219, [2], correction = 0, keepdim = True) getitem_36: "f32[1, 4624, 1]" = var_mean_18[0] getitem_37: "f32[1, 4624, 1]" = var_mean_18[1]; var_mean_18 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_228: "f16[512]" = torch.ops.prims.convert_element_type.default(arg112_1, torch.float16); arg112_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_220: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg110_1, torch.float16); arg110_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_28: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_219, getitem_37); convert_element_type_219 = getitem_37 = None add_77: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_36, 1e-05); getitem_36 = None rsqrt_18: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_77); add_77 = None mul_67: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_28, rsqrt_18); sub_28 = rsqrt_18 = None mul_68: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_67, arg107_1); mul_67 = arg107_1 = None add_78: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_68, arg108_1); mul_68 = arg108_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_222: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_78, torch.float16); add_78 = None view_209: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_222, [4624, 512]); convert_element_type_222 = None convert_element_type_221: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg109_1, torch.float16); arg109_1 = None permute_81: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_221, [1, 0]); convert_element_type_221 = None # No stacktrace found for following nodes mm_default_61: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_209, permute_81); view_209 = permute_81 = None add_tensor_61: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_61, convert_element_type_220); mm_default_61 = convert_element_type_220 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_210: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_61, [1, 4624, 2048]); add_tensor_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_226: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_210, torch.float32); view_210 = None mul_69: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_226, 0.5) mul_70: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_226, 0.7071067811865476); convert_element_type_226 = None erf_6: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_70); mul_70 = None add_79: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_6, 1); erf_6 = None mul_71: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_69, add_79); mul_69 = add_79 = None convert_element_type_227: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_71, torch.float16); mul_71 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_211: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_227, [4624, 2048]); convert_element_type_227 = None convert_element_type_229: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg111_1, torch.float16); arg111_1 = None permute_82: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_229, [1, 0]); convert_element_type_229 = None # No stacktrace found for following nodes mm_default_60: "f16[4624, 512]" = torch.ops.aten.mm.default(view_211, permute_82); view_211 = permute_82 = None add_tensor_60: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_60, convert_element_type_228); mm_default_60 = convert_element_type_228 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_212: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_60, [1, 4624, 512]); add_tensor_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_80: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_76, view_212); add_76 = view_212 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_233: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_80, torch.float32) var_mean_19 = torch.ops.aten.var_mean.correction(convert_element_type_233, [2], correction = 0, keepdim = True) getitem_38: "f32[1, 4624, 1]" = var_mean_19[0] getitem_39: "f32[1, 4624, 1]" = var_mean_19[1]; var_mean_19 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_245: "f16[512]" = torch.ops.prims.convert_element_type.default(arg120_1, torch.float16); arg120_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_234: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg116_1, torch.float16); arg116_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_29: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_233, getitem_39); convert_element_type_233 = getitem_39 = None add_81: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_38, 1e-05); getitem_38 = None rsqrt_19: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_81); add_81 = None mul_72: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_29, rsqrt_19); sub_29 = rsqrt_19 = None mul_73: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_72, arg113_1); mul_72 = arg113_1 = None add_82: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_73, arg114_1); mul_73 = arg114_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_213: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_82, [1, 68, 68, 512]); add_82 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_7: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_213, [0, 0, 0, 2, 0, 2], 0.0); view_213 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_12: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_83: "i64[70]" = torch.ops.aten.add.Tensor(iota_12, 3); iota_12 = None fmod_12: "i64[70]" = torch.ops.aten.fmod.Scalar(add_83, 70); add_83 = None index_19: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_7, [None, fmod_12]); constant_pad_nd_7 = fmod_12 = None iota_13: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_84: "i64[70]" = torch.ops.aten.add.Tensor(iota_13, 3); iota_13 = None fmod_13: "i64[70]" = torch.ops.aten.fmod.Scalar(add_84, 70); add_84 = None index_20: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_19, [None, None, fmod_13]); index_19 = fmod_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_214: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_20, [1, 10, 7, 10, 7, 512]); index_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_83: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_214, [0, 1, 3, 2, 4, 5]); view_214 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_91: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_83, memory_format = torch.contiguous_format); permute_83 = None view_215: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_91, [-1, 7, 7, 512]); clone_91 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_216: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_215, [-1, 49, 512]); view_215 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_236: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_216, torch.float16); view_216 = None view_217: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_236, [4900, 512]); convert_element_type_236 = None convert_element_type_235: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg115_1, torch.float16); arg115_1 = None permute_84: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_235, [1, 0]); convert_element_type_235 = None # No stacktrace found for following nodes mm_default_59: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_217, permute_84); view_217 = permute_84 = None add_tensor_59: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_59, convert_element_type_234); mm_default_59 = convert_element_type_234 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_218: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_59, [100, 49, 1536]); add_tensor_59 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_219: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_218, [100, 49, 3, 16, 32]); view_218 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_85: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_219, [2, 0, 3, 1, 4]); view_219 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_21: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_74: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_21, 0.1767766952966369); select_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_28: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_74, [100, 16, 49, 32]); mul_74 = None clone_92: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_28, memory_format = torch.contiguous_format); expand_28 = None view_220: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_92, [1600, 49, 32]); clone_92 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_22: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_86: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_22, [0, 1, 3, 2]); select_22 = None expand_29: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_86, [100, 16, 32, 49]); permute_86 = None clone_93: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_29, memory_format = torch.contiguous_format); expand_29 = None view_221: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_93, [1600, 32, 49]); clone_93 = None bmm_14: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_220, view_221); view_220 = view_221 = None view_222: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_14, [100, 16, 49, 49]); bmm_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_223: "i64[2401]" = torch.ops.aten.reshape.default(arg118_1, [-1]); arg118_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_21: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg117_1, [view_223]); arg117_1 = view_223 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_224: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_21, [49, 49, -1]); index_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_87: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_224, [2, 0, 1]); view_224 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_94: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_87, memory_format = torch.contiguous_format); permute_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_19: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_94, 0); clone_94 = None add_85: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_222, unsqueeze_19); view_222 = unsqueeze_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_225: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_85, [1, 100, 16, 49, 49]); add_85 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_20: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_21: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_20, 0); unsqueeze_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_86: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_225, unsqueeze_21); view_225 = unsqueeze_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_226: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_86, [-1, 16, 49, 49]); add_86 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_7: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_226, [-1], True) sub_30: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_226, amax_7); view_226 = amax_7 = None exp_7: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_30); sub_30 = None sum_8: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_7, [-1], True) div_13: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_7, sum_8); exp_7 = sum_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_242: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_13, torch.float16); div_13 = None expand_30: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_242, [100, 16, 49, 49]); convert_element_type_242 = None view_227: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_30, [1600, 49, 49]); expand_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_23: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 2); permute_85 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_31: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_23, [100, 16, 49, 32]); select_23 = None clone_96: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_31, memory_format = torch.contiguous_format); expand_31 = None view_228: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_96, [1600, 49, 32]); clone_96 = None bmm_15: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_227, view_228); view_227 = view_228 = None view_229: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_15, [100, 16, 49, 32]); bmm_15 = None permute_88: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_229, [0, 2, 1, 3]); view_229 = None clone_97: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_88, memory_format = torch.contiguous_format); permute_88 = None view_230: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_97, [100, 49, 512]); clone_97 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_231: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_230, [4900, 512]); view_230 = None convert_element_type_246: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg119_1, torch.float16); arg119_1 = None permute_89: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_246, [1, 0]); convert_element_type_246 = None # No stacktrace found for following nodes mm_default_58: "f16[4900, 512]" = torch.ops.aten.mm.default(view_231, permute_89); view_231 = permute_89 = None add_tensor_58: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_58, convert_element_type_245); mm_default_58 = convert_element_type_245 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_232: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_58, [100, 49, 512]); add_tensor_58 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_233: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_232, [-1, 7, 7, 512]); view_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_234: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_233, [1, 10, 10, 7, 7, -1]); view_233 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_90: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_234, [0, 1, 3, 2, 4, 5]); view_234 = None clone_99: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_90, memory_format = torch.contiguous_format); permute_90 = None view_235: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_99, [1, 70, 70, -1]); clone_99 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_14: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_87: "i64[70]" = torch.ops.aten.add.Tensor(iota_14, 67); iota_14 = None fmod_14: "i64[70]" = torch.ops.aten.fmod.Scalar(add_87, 70); add_87 = None index_22: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_235, [None, fmod_14]); view_235 = fmod_14 = None iota_15: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_88: "i64[70]" = torch.ops.aten.add.Tensor(iota_15, 67); iota_15 = None fmod_15: "i64[70]" = torch.ops.aten.fmod.Scalar(add_88, 70); add_88 = None index_23: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_22, [None, None, fmod_15]); index_22 = fmod_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_479: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_23, 1, 0, 68); index_23 = None slice_480: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_479, 2, 0, 68); slice_479 = None clone_100: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_480, memory_format = torch.contiguous_format); slice_480 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_236: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_100, [1, 4624, 512]); clone_100 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_89: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_80, view_236); add_80 = view_236 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_250: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_89, torch.float32) var_mean_20 = torch.ops.aten.var_mean.correction(convert_element_type_250, [2], correction = 0, keepdim = True) getitem_40: "f32[1, 4624, 1]" = var_mean_20[0] getitem_41: "f32[1, 4624, 1]" = var_mean_20[1]; var_mean_20 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_259: "f16[512]" = torch.ops.prims.convert_element_type.default(arg126_1, torch.float16); arg126_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_251: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg124_1, torch.float16); arg124_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_31: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_250, getitem_41); convert_element_type_250 = getitem_41 = None add_90: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_40, 1e-05); getitem_40 = None rsqrt_20: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_90); add_90 = None mul_75: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_31, rsqrt_20); sub_31 = rsqrt_20 = None mul_76: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_75, arg121_1); mul_75 = arg121_1 = None add_91: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_76, arg122_1); mul_76 = arg122_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_253: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_91, torch.float16); add_91 = None view_237: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_253, [4624, 512]); convert_element_type_253 = None convert_element_type_252: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg123_1, torch.float16); arg123_1 = None permute_91: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_252, [1, 0]); convert_element_type_252 = None # No stacktrace found for following nodes mm_default_57: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_237, permute_91); view_237 = permute_91 = None add_tensor_57: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_57, convert_element_type_251); mm_default_57 = convert_element_type_251 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_238: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_57, [1, 4624, 2048]); add_tensor_57 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_257: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_238, torch.float32); view_238 = None mul_77: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_257, 0.5) mul_78: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_257, 0.7071067811865476); convert_element_type_257 = None erf_7: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_78); mul_78 = None add_92: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_7, 1); erf_7 = None mul_79: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_77, add_92); mul_77 = add_92 = None convert_element_type_258: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_79, torch.float16); mul_79 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_239: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_258, [4624, 2048]); convert_element_type_258 = None convert_element_type_260: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg125_1, torch.float16); arg125_1 = None permute_92: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_260, [1, 0]); convert_element_type_260 = None # No stacktrace found for following nodes mm_default_56: "f16[4624, 512]" = torch.ops.aten.mm.default(view_239, permute_92); view_239 = permute_92 = None add_tensor_56: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_56, convert_element_type_259); mm_default_56 = convert_element_type_259 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_240: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_56, [1, 4624, 512]); add_tensor_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_93: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_89, view_240); add_89 = view_240 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_264: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_93, torch.float32) var_mean_21 = torch.ops.aten.var_mean.correction(convert_element_type_264, [2], correction = 0, keepdim = True) getitem_42: "f32[1, 4624, 1]" = var_mean_21[0] getitem_43: "f32[1, 4624, 1]" = var_mean_21[1]; var_mean_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_276: "f16[512]" = torch.ops.prims.convert_element_type.default(arg134_1, torch.float16); arg134_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_265: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg130_1, torch.float16); arg130_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_32: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_264, getitem_43); convert_element_type_264 = getitem_43 = None add_94: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None rsqrt_21: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_94); add_94 = None mul_80: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_32, rsqrt_21); sub_32 = rsqrt_21 = None mul_81: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_80, arg127_1); mul_80 = arg127_1 = None add_95: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_81, arg128_1); mul_81 = arg128_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_241: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_95, [1, 68, 68, 512]); add_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_8: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_241, [0, 0, 0, 2, 0, 2], 0.0); view_241 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_242: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_8, [1, 10, 7, 10, 7, 512]); constant_pad_nd_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_93: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_242, [0, 1, 3, 2, 4, 5]); view_242 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_103: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_93, memory_format = torch.contiguous_format); permute_93 = None view_243: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_103, [-1, 7, 7, 512]); clone_103 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_244: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_243, [-1, 49, 512]); view_243 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_267: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_244, torch.float16); view_244 = None view_245: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_267, [4900, 512]); convert_element_type_267 = None convert_element_type_266: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg129_1, torch.float16); arg129_1 = None permute_94: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_266, [1, 0]); convert_element_type_266 = None # No stacktrace found for following nodes mm_default_55: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_245, permute_94); view_245 = permute_94 = None add_tensor_55: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_55, convert_element_type_265); mm_default_55 = convert_element_type_265 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_246: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_55, [100, 49, 1536]); add_tensor_55 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_247: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_246, [100, 49, 3, 16, 32]); view_246 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_95: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_247, [2, 0, 3, 1, 4]); view_247 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_24: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_82: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_24, 0.1767766952966369); select_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_32: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_82, [100, 16, 49, 32]); mul_82 = None clone_104: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_32, memory_format = torch.contiguous_format); expand_32 = None view_248: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_104, [1600, 49, 32]); clone_104 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_25: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_96: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_25, [0, 1, 3, 2]); select_25 = None expand_33: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_96, [100, 16, 32, 49]); permute_96 = None clone_105: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_33, memory_format = torch.contiguous_format); expand_33 = None view_249: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_105, [1600, 32, 49]); clone_105 = None bmm_16: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_248, view_249); view_248 = view_249 = None view_250: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_16, [100, 16, 49, 49]); bmm_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_251: "i64[2401]" = torch.ops.aten.reshape.default(arg132_1, [-1]); arg132_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_24: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg131_1, [view_251]); arg131_1 = view_251 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_252: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_24, [49, 49, -1]); index_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_97: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_252, [2, 0, 1]); view_252 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_106: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_97, memory_format = torch.contiguous_format); permute_97 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_22: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_106, 0); clone_106 = None add_96: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_250, unsqueeze_22); view_250 = unsqueeze_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_8: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_96, [-1], True) sub_33: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_96, amax_8); add_96 = amax_8 = None exp_8: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_33); sub_33 = None sum_9: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_8, [-1], True) div_14: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_8, sum_9); exp_8 = sum_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_273: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_14, torch.float16); div_14 = None expand_34: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_273, [100, 16, 49, 49]); convert_element_type_273 = None view_253: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_34, [1600, 49, 49]); expand_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_26: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 2); permute_95 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_35: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_26, [100, 16, 49, 32]); select_26 = None clone_108: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_35, memory_format = torch.contiguous_format); expand_35 = None view_254: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_108, [1600, 49, 32]); clone_108 = None bmm_17: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_253, view_254); view_253 = view_254 = None view_255: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_17, [100, 16, 49, 32]); bmm_17 = None permute_98: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_255, [0, 2, 1, 3]); view_255 = None clone_109: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_98, memory_format = torch.contiguous_format); permute_98 = None view_256: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_109, [100, 49, 512]); clone_109 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_257: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_256, [4900, 512]); view_256 = None convert_element_type_277: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg133_1, torch.float16); arg133_1 = None permute_99: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_277, [1, 0]); convert_element_type_277 = None # No stacktrace found for following nodes mm_default_54: "f16[4900, 512]" = torch.ops.aten.mm.default(view_257, permute_99); view_257 = permute_99 = None add_tensor_54: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_54, convert_element_type_276); mm_default_54 = convert_element_type_276 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_258: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_54, [100, 49, 512]); add_tensor_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_259: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_258, [-1, 7, 7, 512]); view_258 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_260: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_259, [1, 10, 10, 7, 7, -1]); view_259 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_100: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_260, [0, 1, 3, 2, 4, 5]); view_260 = None clone_111: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_100, memory_format = torch.contiguous_format); permute_100 = None view_261: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_111, [1, 70, 70, -1]); clone_111 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_483: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_261, 1, 0, 68); view_261 = None slice_484: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_483, 2, 0, 68); slice_483 = None clone_112: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_484, memory_format = torch.contiguous_format); slice_484 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_262: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_112, [1, 4624, 512]); clone_112 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_97: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_93, view_262); add_93 = view_262 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_281: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_97, torch.float32) var_mean_22 = torch.ops.aten.var_mean.correction(convert_element_type_281, [2], correction = 0, keepdim = True) getitem_44: "f32[1, 4624, 1]" = var_mean_22[0] getitem_45: "f32[1, 4624, 1]" = var_mean_22[1]; var_mean_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_290: "f16[512]" = torch.ops.prims.convert_element_type.default(arg140_1, torch.float16); arg140_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_282: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg138_1, torch.float16); arg138_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_34: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_281, getitem_45); convert_element_type_281 = getitem_45 = None add_98: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None rsqrt_22: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_98); add_98 = None mul_83: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_34, rsqrt_22); sub_34 = rsqrt_22 = None mul_84: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_83, arg135_1); mul_83 = arg135_1 = None add_99: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_84, arg136_1); mul_84 = arg136_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_284: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_99, torch.float16); add_99 = None view_263: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_284, [4624, 512]); convert_element_type_284 = None convert_element_type_283: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg137_1, torch.float16); arg137_1 = None permute_101: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_283, [1, 0]); convert_element_type_283 = None # No stacktrace found for following nodes mm_default_53: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_263, permute_101); view_263 = permute_101 = None add_tensor_53: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_53, convert_element_type_282); mm_default_53 = convert_element_type_282 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_264: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_53, [1, 4624, 2048]); add_tensor_53 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_288: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_264, torch.float32); view_264 = None mul_85: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_288, 0.5) mul_86: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_288, 0.7071067811865476); convert_element_type_288 = None erf_8: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_86); mul_86 = None add_100: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_8, 1); erf_8 = None mul_87: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_85, add_100); mul_85 = add_100 = None convert_element_type_289: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_87, torch.float16); mul_87 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_265: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_289, [4624, 2048]); convert_element_type_289 = None convert_element_type_291: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg139_1, torch.float16); arg139_1 = None permute_102: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_291, [1, 0]); convert_element_type_291 = None # No stacktrace found for following nodes mm_default_52: "f16[4624, 512]" = torch.ops.aten.mm.default(view_265, permute_102); view_265 = permute_102 = None add_tensor_52: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_52, convert_element_type_290); mm_default_52 = convert_element_type_290 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_266: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_52, [1, 4624, 512]); add_tensor_52 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_101: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_97, view_266); add_97 = view_266 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_295: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_101, torch.float32) var_mean_23 = torch.ops.aten.var_mean.correction(convert_element_type_295, [2], correction = 0, keepdim = True) getitem_46: "f32[1, 4624, 1]" = var_mean_23[0] getitem_47: "f32[1, 4624, 1]" = var_mean_23[1]; var_mean_23 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_307: "f16[512]" = torch.ops.prims.convert_element_type.default(arg148_1, torch.float16); arg148_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_296: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg144_1, torch.float16); arg144_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_35: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_295, getitem_47); convert_element_type_295 = getitem_47 = None add_102: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_46, 1e-05); getitem_46 = None rsqrt_23: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_102); add_102 = None mul_88: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_35, rsqrt_23); sub_35 = rsqrt_23 = None mul_89: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_88, arg141_1); mul_88 = arg141_1 = None add_103: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_89, arg142_1); mul_89 = arg142_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_267: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_103, [1, 68, 68, 512]); add_103 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_9: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_267, [0, 0, 0, 2, 0, 2], 0.0); view_267 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_16: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_104: "i64[70]" = torch.ops.aten.add.Tensor(iota_16, 3); iota_16 = None fmod_16: "i64[70]" = torch.ops.aten.fmod.Scalar(add_104, 70); add_104 = None index_25: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_9, [None, fmod_16]); constant_pad_nd_9 = fmod_16 = None iota_17: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_105: "i64[70]" = torch.ops.aten.add.Tensor(iota_17, 3); iota_17 = None fmod_17: "i64[70]" = torch.ops.aten.fmod.Scalar(add_105, 70); add_105 = None index_26: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_25, [None, None, fmod_17]); index_25 = fmod_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_268: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_26, [1, 10, 7, 10, 7, 512]); index_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_103: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_268, [0, 1, 3, 2, 4, 5]); view_268 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_115: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_103, memory_format = torch.contiguous_format); permute_103 = None view_269: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_115, [-1, 7, 7, 512]); clone_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_270: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_269, [-1, 49, 512]); view_269 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_298: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_270, torch.float16); view_270 = None view_271: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_298, [4900, 512]); convert_element_type_298 = None convert_element_type_297: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg143_1, torch.float16); arg143_1 = None permute_104: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_297, [1, 0]); convert_element_type_297 = None # No stacktrace found for following nodes mm_default_51: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_271, permute_104); view_271 = permute_104 = None add_tensor_51: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_51, convert_element_type_296); mm_default_51 = convert_element_type_296 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_272: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_51, [100, 49, 1536]); add_tensor_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_273: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_272, [100, 49, 3, 16, 32]); view_272 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_105: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_273, [2, 0, 3, 1, 4]); view_273 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_27: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_90: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_27, 0.1767766952966369); select_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_36: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_90, [100, 16, 49, 32]); mul_90 = None clone_116: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_36, memory_format = torch.contiguous_format); expand_36 = None view_274: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_116, [1600, 49, 32]); clone_116 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_28: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_106: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_28, [0, 1, 3, 2]); select_28 = None expand_37: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_106, [100, 16, 32, 49]); permute_106 = None clone_117: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_37, memory_format = torch.contiguous_format); expand_37 = None view_275: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_117, [1600, 32, 49]); clone_117 = None bmm_18: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_274, view_275); view_274 = view_275 = None view_276: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_18, [100, 16, 49, 49]); bmm_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_277: "i64[2401]" = torch.ops.aten.reshape.default(arg146_1, [-1]); arg146_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_27: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg145_1, [view_277]); arg145_1 = view_277 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_278: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_27, [49, 49, -1]); index_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_107: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_278, [2, 0, 1]); view_278 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_118: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_107, memory_format = torch.contiguous_format); permute_107 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_23: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_118, 0); clone_118 = None add_106: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_276, unsqueeze_23); view_276 = unsqueeze_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_279: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_106, [1, 100, 16, 49, 49]); add_106 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_24: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_25: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_24, 0); unsqueeze_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_107: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_279, unsqueeze_25); view_279 = unsqueeze_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_280: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_107, [-1, 16, 49, 49]); add_107 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_9: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_280, [-1], True) sub_36: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_280, amax_9); view_280 = amax_9 = None exp_9: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_36); sub_36 = None sum_10: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_9, [-1], True) div_15: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_9, sum_10); exp_9 = sum_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_304: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_15, torch.float16); div_15 = None expand_38: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_304, [100, 16, 49, 49]); convert_element_type_304 = None view_281: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_38, [1600, 49, 49]); expand_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_29: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 2); permute_105 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_39: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_29, [100, 16, 49, 32]); select_29 = None clone_120: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_39, memory_format = torch.contiguous_format); expand_39 = None view_282: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_120, [1600, 49, 32]); clone_120 = None bmm_19: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_281, view_282); view_281 = view_282 = None view_283: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_19, [100, 16, 49, 32]); bmm_19 = None permute_108: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_283, [0, 2, 1, 3]); view_283 = None clone_121: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_108, memory_format = torch.contiguous_format); permute_108 = None view_284: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_121, [100, 49, 512]); clone_121 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_285: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_284, [4900, 512]); view_284 = None convert_element_type_308: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg147_1, torch.float16); arg147_1 = None permute_109: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_308, [1, 0]); convert_element_type_308 = None # No stacktrace found for following nodes mm_default_50: "f16[4900, 512]" = torch.ops.aten.mm.default(view_285, permute_109); view_285 = permute_109 = None add_tensor_50: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_50, convert_element_type_307); mm_default_50 = convert_element_type_307 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_286: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_50, [100, 49, 512]); add_tensor_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_287: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_286, [-1, 7, 7, 512]); view_286 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_288: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_287, [1, 10, 10, 7, 7, -1]); view_287 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_110: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_288, [0, 1, 3, 2, 4, 5]); view_288 = None clone_123: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_110, memory_format = torch.contiguous_format); permute_110 = None view_289: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_123, [1, 70, 70, -1]); clone_123 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_18: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_108: "i64[70]" = torch.ops.aten.add.Tensor(iota_18, 67); iota_18 = None fmod_18: "i64[70]" = torch.ops.aten.fmod.Scalar(add_108, 70); add_108 = None index_28: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_289, [None, fmod_18]); view_289 = fmod_18 = None iota_19: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_109: "i64[70]" = torch.ops.aten.add.Tensor(iota_19, 67); iota_19 = None fmod_19: "i64[70]" = torch.ops.aten.fmod.Scalar(add_109, 70); add_109 = None index_29: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_28, [None, None, fmod_19]); index_28 = fmod_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_493: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_29, 1, 0, 68); index_29 = None slice_494: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_493, 2, 0, 68); slice_493 = None clone_124: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_494, memory_format = torch.contiguous_format); slice_494 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_290: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_124, [1, 4624, 512]); clone_124 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_110: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_101, view_290); add_101 = view_290 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_312: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_110, torch.float32) var_mean_24 = torch.ops.aten.var_mean.correction(convert_element_type_312, [2], correction = 0, keepdim = True) getitem_48: "f32[1, 4624, 1]" = var_mean_24[0] getitem_49: "f32[1, 4624, 1]" = var_mean_24[1]; var_mean_24 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_321: "f16[512]" = torch.ops.prims.convert_element_type.default(arg154_1, torch.float16); arg154_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_313: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg152_1, torch.float16); arg152_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_37: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_312, getitem_49); convert_element_type_312 = getitem_49 = None add_111: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_48, 1e-05); getitem_48 = None rsqrt_24: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_111); add_111 = None mul_91: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_37, rsqrt_24); sub_37 = rsqrt_24 = None mul_92: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_91, arg149_1); mul_91 = arg149_1 = None add_112: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_92, arg150_1); mul_92 = arg150_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_315: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_112, torch.float16); add_112 = None view_291: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_315, [4624, 512]); convert_element_type_315 = None convert_element_type_314: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg151_1, torch.float16); arg151_1 = None permute_111: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_314, [1, 0]); convert_element_type_314 = None # No stacktrace found for following nodes mm_default_49: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_291, permute_111); view_291 = permute_111 = None add_tensor_49: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_49, convert_element_type_313); mm_default_49 = convert_element_type_313 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_292: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_49, [1, 4624, 2048]); add_tensor_49 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_319: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_292, torch.float32); view_292 = None mul_93: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_319, 0.5) mul_94: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_319, 0.7071067811865476); convert_element_type_319 = None erf_9: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_94); mul_94 = None add_113: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_9, 1); erf_9 = None mul_95: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_93, add_113); mul_93 = add_113 = None convert_element_type_320: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_95, torch.float16); mul_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_293: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_320, [4624, 2048]); convert_element_type_320 = None convert_element_type_322: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg153_1, torch.float16); arg153_1 = None permute_112: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_322, [1, 0]); convert_element_type_322 = None # No stacktrace found for following nodes mm_default_48: "f16[4624, 512]" = torch.ops.aten.mm.default(view_293, permute_112); view_293 = permute_112 = None add_tensor_48: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_48, convert_element_type_321); mm_default_48 = convert_element_type_321 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_294: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_48, [1, 4624, 512]); add_tensor_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_114: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_110, view_294); add_110 = view_294 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_326: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_114, torch.float32) var_mean_25 = torch.ops.aten.var_mean.correction(convert_element_type_326, [2], correction = 0, keepdim = True) getitem_50: "f32[1, 4624, 1]" = var_mean_25[0] getitem_51: "f32[1, 4624, 1]" = var_mean_25[1]; var_mean_25 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_338: "f16[512]" = torch.ops.prims.convert_element_type.default(arg162_1, torch.float16); arg162_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_327: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg158_1, torch.float16); arg158_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_38: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_326, getitem_51); convert_element_type_326 = getitem_51 = None add_115: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_50, 1e-05); getitem_50 = None rsqrt_25: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_115); add_115 = None mul_96: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_38, rsqrt_25); sub_38 = rsqrt_25 = None mul_97: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_96, arg155_1); mul_96 = arg155_1 = None add_116: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_97, arg156_1); mul_97 = arg156_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_295: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_116, [1, 68, 68, 512]); add_116 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_10: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_295, [0, 0, 0, 2, 0, 2], 0.0); view_295 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_296: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_10, [1, 10, 7, 10, 7, 512]); constant_pad_nd_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_113: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_296, [0, 1, 3, 2, 4, 5]); view_296 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_127: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_113, memory_format = torch.contiguous_format); permute_113 = None view_297: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_127, [-1, 7, 7, 512]); clone_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_298: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_297, [-1, 49, 512]); view_297 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_329: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_298, torch.float16); view_298 = None view_299: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_329, [4900, 512]); convert_element_type_329 = None convert_element_type_328: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg157_1, torch.float16); arg157_1 = None permute_114: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_328, [1, 0]); convert_element_type_328 = None # No stacktrace found for following nodes mm_default_47: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_299, permute_114); view_299 = permute_114 = None add_tensor_47: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_47, convert_element_type_327); mm_default_47 = convert_element_type_327 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_300: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_47, [100, 49, 1536]); add_tensor_47 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_301: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_300, [100, 49, 3, 16, 32]); view_300 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_115: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_301, [2, 0, 3, 1, 4]); view_301 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_30: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_98: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_30, 0.1767766952966369); select_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_40: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_98, [100, 16, 49, 32]); mul_98 = None clone_128: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_40, memory_format = torch.contiguous_format); expand_40 = None view_302: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_128, [1600, 49, 32]); clone_128 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_31: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_116: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_31, [0, 1, 3, 2]); select_31 = None expand_41: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_116, [100, 16, 32, 49]); permute_116 = None clone_129: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_41, memory_format = torch.contiguous_format); expand_41 = None view_303: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_129, [1600, 32, 49]); clone_129 = None bmm_20: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_302, view_303); view_302 = view_303 = None view_304: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_20, [100, 16, 49, 49]); bmm_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_305: "i64[2401]" = torch.ops.aten.reshape.default(arg160_1, [-1]); arg160_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_30: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg159_1, [view_305]); arg159_1 = view_305 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_306: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_30, [49, 49, -1]); index_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_117: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_306, [2, 0, 1]); view_306 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_130: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_117, memory_format = torch.contiguous_format); permute_117 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_26: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_130, 0); clone_130 = None add_117: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_304, unsqueeze_26); view_304 = unsqueeze_26 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_10: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_117, [-1], True) sub_39: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_117, amax_10); add_117 = amax_10 = None exp_10: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_39); sub_39 = None sum_11: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_10, [-1], True) div_16: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_10, sum_11); exp_10 = sum_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_335: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_16, torch.float16); div_16 = None expand_42: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_335, [100, 16, 49, 49]); convert_element_type_335 = None view_307: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_42, [1600, 49, 49]); expand_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_32: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 2); permute_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_43: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_32, [100, 16, 49, 32]); select_32 = None clone_132: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_43, memory_format = torch.contiguous_format); expand_43 = None view_308: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_132, [1600, 49, 32]); clone_132 = None bmm_21: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_307, view_308); view_307 = view_308 = None view_309: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_21, [100, 16, 49, 32]); bmm_21 = None permute_118: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_309, [0, 2, 1, 3]); view_309 = None clone_133: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_118, memory_format = torch.contiguous_format); permute_118 = None view_310: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_133, [100, 49, 512]); clone_133 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_311: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_310, [4900, 512]); view_310 = None convert_element_type_339: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg161_1, torch.float16); arg161_1 = None permute_119: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_339, [1, 0]); convert_element_type_339 = None # No stacktrace found for following nodes mm_default_46: "f16[4900, 512]" = torch.ops.aten.mm.default(view_311, permute_119); view_311 = permute_119 = None add_tensor_46: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_46, convert_element_type_338); mm_default_46 = convert_element_type_338 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_312: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_46, [100, 49, 512]); add_tensor_46 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_313: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_312, [-1, 7, 7, 512]); view_312 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_314: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_313, [1, 10, 10, 7, 7, -1]); view_313 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_120: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_314, [0, 1, 3, 2, 4, 5]); view_314 = None clone_135: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_120, memory_format = torch.contiguous_format); permute_120 = None view_315: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_135, [1, 70, 70, -1]); clone_135 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_497: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_315, 1, 0, 68); view_315 = None slice_498: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_497, 2, 0, 68); slice_497 = None clone_136: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_498, memory_format = torch.contiguous_format); slice_498 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_316: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_136, [1, 4624, 512]); clone_136 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_118: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_114, view_316); add_114 = view_316 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_343: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_118, torch.float32) var_mean_26 = torch.ops.aten.var_mean.correction(convert_element_type_343, [2], correction = 0, keepdim = True) getitem_52: "f32[1, 4624, 1]" = var_mean_26[0] getitem_53: "f32[1, 4624, 1]" = var_mean_26[1]; var_mean_26 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_352: "f16[512]" = torch.ops.prims.convert_element_type.default(arg168_1, torch.float16); arg168_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_344: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg166_1, torch.float16); arg166_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_40: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_343, getitem_53); convert_element_type_343 = getitem_53 = None add_119: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_52, 1e-05); getitem_52 = None rsqrt_26: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_119); add_119 = None mul_99: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_40, rsqrt_26); sub_40 = rsqrt_26 = None mul_100: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_99, arg163_1); mul_99 = arg163_1 = None add_120: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_100, arg164_1); mul_100 = arg164_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_346: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_120, torch.float16); add_120 = None view_317: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_346, [4624, 512]); convert_element_type_346 = None convert_element_type_345: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg165_1, torch.float16); arg165_1 = None permute_121: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_345, [1, 0]); convert_element_type_345 = None # No stacktrace found for following nodes mm_default_45: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_317, permute_121); view_317 = permute_121 = None add_tensor_45: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_45, convert_element_type_344); mm_default_45 = convert_element_type_344 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_318: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_45, [1, 4624, 2048]); add_tensor_45 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_350: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_318, torch.float32); view_318 = None mul_101: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_350, 0.5) mul_102: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_350, 0.7071067811865476); convert_element_type_350 = None erf_10: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_102); mul_102 = None add_121: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_10, 1); erf_10 = None mul_103: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_101, add_121); mul_101 = add_121 = None convert_element_type_351: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_103, torch.float16); mul_103 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_319: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_351, [4624, 2048]); convert_element_type_351 = None convert_element_type_353: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg167_1, torch.float16); arg167_1 = None permute_122: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_353, [1, 0]); convert_element_type_353 = None # No stacktrace found for following nodes mm_default_44: "f16[4624, 512]" = torch.ops.aten.mm.default(view_319, permute_122); view_319 = permute_122 = None add_tensor_44: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_44, convert_element_type_352); mm_default_44 = convert_element_type_352 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_320: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_44, [1, 4624, 512]); add_tensor_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_122: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_118, view_320); add_118 = view_320 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_357: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_122, torch.float32) var_mean_27 = torch.ops.aten.var_mean.correction(convert_element_type_357, [2], correction = 0, keepdim = True) getitem_54: "f32[1, 4624, 1]" = var_mean_27[0] getitem_55: "f32[1, 4624, 1]" = var_mean_27[1]; var_mean_27 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_369: "f16[512]" = torch.ops.prims.convert_element_type.default(arg176_1, torch.float16); arg176_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_358: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg172_1, torch.float16); arg172_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_41: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_357, getitem_55); convert_element_type_357 = getitem_55 = None add_123: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_54, 1e-05); getitem_54 = None rsqrt_27: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_123); add_123 = None mul_104: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_41, rsqrt_27); sub_41 = rsqrt_27 = None mul_105: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_104, arg169_1); mul_104 = arg169_1 = None add_124: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_105, arg170_1); mul_105 = arg170_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_321: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_124, [1, 68, 68, 512]); add_124 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_11: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_321, [0, 0, 0, 2, 0, 2], 0.0); view_321 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_20: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_125: "i64[70]" = torch.ops.aten.add.Tensor(iota_20, 3); iota_20 = None fmod_20: "i64[70]" = torch.ops.aten.fmod.Scalar(add_125, 70); add_125 = None index_31: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_11, [None, fmod_20]); constant_pad_nd_11 = fmod_20 = None iota_21: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_126: "i64[70]" = torch.ops.aten.add.Tensor(iota_21, 3); iota_21 = None fmod_21: "i64[70]" = torch.ops.aten.fmod.Scalar(add_126, 70); add_126 = None index_32: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_31, [None, None, fmod_21]); index_31 = fmod_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_322: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_32, [1, 10, 7, 10, 7, 512]); index_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_123: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_322, [0, 1, 3, 2, 4, 5]); view_322 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_139: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_123, memory_format = torch.contiguous_format); permute_123 = None view_323: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_139, [-1, 7, 7, 512]); clone_139 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_324: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_323, [-1, 49, 512]); view_323 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_360: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_324, torch.float16); view_324 = None view_325: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_360, [4900, 512]); convert_element_type_360 = None convert_element_type_359: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg171_1, torch.float16); arg171_1 = None permute_124: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_359, [1, 0]); convert_element_type_359 = None # No stacktrace found for following nodes mm_default_43: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_325, permute_124); view_325 = permute_124 = None add_tensor_43: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_43, convert_element_type_358); mm_default_43 = convert_element_type_358 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_326: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_43, [100, 49, 1536]); add_tensor_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_327: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_326, [100, 49, 3, 16, 32]); view_326 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_125: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_327, [2, 0, 3, 1, 4]); view_327 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_33: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_106: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_33, 0.1767766952966369); select_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_44: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_106, [100, 16, 49, 32]); mul_106 = None clone_140: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_44, memory_format = torch.contiguous_format); expand_44 = None view_328: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_140, [1600, 49, 32]); clone_140 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_34: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_126: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_34, [0, 1, 3, 2]); select_34 = None expand_45: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_126, [100, 16, 32, 49]); permute_126 = None clone_141: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_45, memory_format = torch.contiguous_format); expand_45 = None view_329: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_141, [1600, 32, 49]); clone_141 = None bmm_22: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_328, view_329); view_328 = view_329 = None view_330: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_22, [100, 16, 49, 49]); bmm_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_331: "i64[2401]" = torch.ops.aten.reshape.default(arg174_1, [-1]); arg174_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_33: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg173_1, [view_331]); arg173_1 = view_331 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_332: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_33, [49, 49, -1]); index_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_127: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_332, [2, 0, 1]); view_332 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_142: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_127, memory_format = torch.contiguous_format); permute_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_27: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_142, 0); clone_142 = None add_127: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_330, unsqueeze_27); view_330 = unsqueeze_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_333: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_127, [1, 100, 16, 49, 49]); add_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_28: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_29: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_28, 0); unsqueeze_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_128: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_333, unsqueeze_29); view_333 = unsqueeze_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_334: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_128, [-1, 16, 49, 49]); add_128 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_11: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_334, [-1], True) sub_42: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_334, amax_11); view_334 = amax_11 = None exp_11: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_42); sub_42 = None sum_12: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_11, [-1], True) div_17: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_11, sum_12); exp_11 = sum_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_366: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_17, torch.float16); div_17 = None expand_46: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_366, [100, 16, 49, 49]); convert_element_type_366 = None view_335: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_46, [1600, 49, 49]); expand_46 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_35: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 2); permute_125 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_47: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_35, [100, 16, 49, 32]); select_35 = None clone_144: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_47, memory_format = torch.contiguous_format); expand_47 = None view_336: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_144, [1600, 49, 32]); clone_144 = None bmm_23: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_335, view_336); view_335 = view_336 = None view_337: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_23, [100, 16, 49, 32]); bmm_23 = None permute_128: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_337, [0, 2, 1, 3]); view_337 = None clone_145: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_128, memory_format = torch.contiguous_format); permute_128 = None view_338: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_145, [100, 49, 512]); clone_145 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_339: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_338, [4900, 512]); view_338 = None convert_element_type_370: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg175_1, torch.float16); arg175_1 = None permute_129: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_370, [1, 0]); convert_element_type_370 = None # No stacktrace found for following nodes mm_default_42: "f16[4900, 512]" = torch.ops.aten.mm.default(view_339, permute_129); view_339 = permute_129 = None add_tensor_42: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_42, convert_element_type_369); mm_default_42 = convert_element_type_369 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_340: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_42, [100, 49, 512]); add_tensor_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_341: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_340, [-1, 7, 7, 512]); view_340 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_342: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_341, [1, 10, 10, 7, 7, -1]); view_341 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_130: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_342, [0, 1, 3, 2, 4, 5]); view_342 = None clone_147: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_130, memory_format = torch.contiguous_format); permute_130 = None view_343: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_147, [1, 70, 70, -1]); clone_147 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_22: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_129: "i64[70]" = torch.ops.aten.add.Tensor(iota_22, 67); iota_22 = None fmod_22: "i64[70]" = torch.ops.aten.fmod.Scalar(add_129, 70); add_129 = None index_34: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_343, [None, fmod_22]); view_343 = fmod_22 = None iota_23: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_130: "i64[70]" = torch.ops.aten.add.Tensor(iota_23, 67); iota_23 = None fmod_23: "i64[70]" = torch.ops.aten.fmod.Scalar(add_130, 70); add_130 = None index_35: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_34, [None, None, fmod_23]); index_34 = fmod_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_507: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_35, 1, 0, 68); index_35 = None slice_508: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_507, 2, 0, 68); slice_507 = None clone_148: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_508, memory_format = torch.contiguous_format); slice_508 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_344: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_148, [1, 4624, 512]); clone_148 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_131: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_122, view_344); add_122 = view_344 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_374: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_131, torch.float32) var_mean_28 = torch.ops.aten.var_mean.correction(convert_element_type_374, [2], correction = 0, keepdim = True) getitem_56: "f32[1, 4624, 1]" = var_mean_28[0] getitem_57: "f32[1, 4624, 1]" = var_mean_28[1]; var_mean_28 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_383: "f16[512]" = torch.ops.prims.convert_element_type.default(arg182_1, torch.float16); arg182_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_375: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg180_1, torch.float16); arg180_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_43: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_374, getitem_57); convert_element_type_374 = getitem_57 = None add_132: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_56, 1e-05); getitem_56 = None rsqrt_28: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_132); add_132 = None mul_107: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_43, rsqrt_28); sub_43 = rsqrt_28 = None mul_108: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_107, arg177_1); mul_107 = arg177_1 = None add_133: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_108, arg178_1); mul_108 = arg178_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_377: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_133, torch.float16); add_133 = None view_345: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_377, [4624, 512]); convert_element_type_377 = None convert_element_type_376: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg179_1, torch.float16); arg179_1 = None permute_131: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_376, [1, 0]); convert_element_type_376 = None # No stacktrace found for following nodes mm_default_41: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_345, permute_131); view_345 = permute_131 = None add_tensor_41: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_41, convert_element_type_375); mm_default_41 = convert_element_type_375 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_346: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_41, [1, 4624, 2048]); add_tensor_41 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_381: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_346, torch.float32); view_346 = None mul_109: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_381, 0.5) mul_110: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_381, 0.7071067811865476); convert_element_type_381 = None erf_11: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_110); mul_110 = None add_134: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_11, 1); erf_11 = None mul_111: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_109, add_134); mul_109 = add_134 = None convert_element_type_382: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_111, torch.float16); mul_111 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_347: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_382, [4624, 2048]); convert_element_type_382 = None convert_element_type_384: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg181_1, torch.float16); arg181_1 = None permute_132: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_384, [1, 0]); convert_element_type_384 = None # No stacktrace found for following nodes mm_default_40: "f16[4624, 512]" = torch.ops.aten.mm.default(view_347, permute_132); view_347 = permute_132 = None add_tensor_40: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_40, convert_element_type_383); mm_default_40 = convert_element_type_383 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_348: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_40, [1, 4624, 512]); add_tensor_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_135: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_131, view_348); add_131 = view_348 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_388: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_135, torch.float32) var_mean_29 = torch.ops.aten.var_mean.correction(convert_element_type_388, [2], correction = 0, keepdim = True) getitem_58: "f32[1, 4624, 1]" = var_mean_29[0] getitem_59: "f32[1, 4624, 1]" = var_mean_29[1]; var_mean_29 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_400: "f16[512]" = torch.ops.prims.convert_element_type.default(arg190_1, torch.float16); arg190_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_389: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg186_1, torch.float16); arg186_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_44: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_388, getitem_59); convert_element_type_388 = getitem_59 = None add_136: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_58, 1e-05); getitem_58 = None rsqrt_29: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_136); add_136 = None mul_112: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_44, rsqrt_29); sub_44 = rsqrt_29 = None mul_113: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_112, arg183_1); mul_112 = arg183_1 = None add_137: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_113, arg184_1); mul_113 = arg184_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_349: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_137, [1, 68, 68, 512]); add_137 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_12: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_349, [0, 0, 0, 2, 0, 2], 0.0); view_349 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_350: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_12, [1, 10, 7, 10, 7, 512]); constant_pad_nd_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_133: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_350, [0, 1, 3, 2, 4, 5]); view_350 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_151: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_133, memory_format = torch.contiguous_format); permute_133 = None view_351: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_151, [-1, 7, 7, 512]); clone_151 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_352: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_351, [-1, 49, 512]); view_351 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_391: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_352, torch.float16); view_352 = None view_353: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_391, [4900, 512]); convert_element_type_391 = None convert_element_type_390: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg185_1, torch.float16); arg185_1 = None permute_134: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_390, [1, 0]); convert_element_type_390 = None # No stacktrace found for following nodes mm_default_39: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_353, permute_134); view_353 = permute_134 = None add_tensor_39: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_39, convert_element_type_389); mm_default_39 = convert_element_type_389 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_354: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_39, [100, 49, 1536]); add_tensor_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_355: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_354, [100, 49, 3, 16, 32]); view_354 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_135: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_355, [2, 0, 3, 1, 4]); view_355 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_36: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_114: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_36, 0.1767766952966369); select_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_48: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_114, [100, 16, 49, 32]); mul_114 = None clone_152: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_48, memory_format = torch.contiguous_format); expand_48 = None view_356: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_152, [1600, 49, 32]); clone_152 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_37: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_136: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_37, [0, 1, 3, 2]); select_37 = None expand_49: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_136, [100, 16, 32, 49]); permute_136 = None clone_153: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_49, memory_format = torch.contiguous_format); expand_49 = None view_357: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_153, [1600, 32, 49]); clone_153 = None bmm_24: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_356, view_357); view_356 = view_357 = None view_358: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_24, [100, 16, 49, 49]); bmm_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_359: "i64[2401]" = torch.ops.aten.reshape.default(arg188_1, [-1]); arg188_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_36: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg187_1, [view_359]); arg187_1 = view_359 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_360: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_36, [49, 49, -1]); index_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_137: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_360, [2, 0, 1]); view_360 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_154: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_137, memory_format = torch.contiguous_format); permute_137 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_30: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_154, 0); clone_154 = None add_138: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_358, unsqueeze_30); view_358 = unsqueeze_30 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_12: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_138, [-1], True) sub_45: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_138, amax_12); add_138 = amax_12 = None exp_12: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_45); sub_45 = None sum_13: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_12, [-1], True) div_18: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_12, sum_13); exp_12 = sum_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_397: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_18, torch.float16); div_18 = None expand_50: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_397, [100, 16, 49, 49]); convert_element_type_397 = None view_361: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_50, [1600, 49, 49]); expand_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_38: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 2); permute_135 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_51: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_38, [100, 16, 49, 32]); select_38 = None clone_156: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_51, memory_format = torch.contiguous_format); expand_51 = None view_362: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_156, [1600, 49, 32]); clone_156 = None bmm_25: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_361, view_362); view_361 = view_362 = None view_363: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_25, [100, 16, 49, 32]); bmm_25 = None permute_138: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_363, [0, 2, 1, 3]); view_363 = None clone_157: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_138, memory_format = torch.contiguous_format); permute_138 = None view_364: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_157, [100, 49, 512]); clone_157 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_365: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_364, [4900, 512]); view_364 = None convert_element_type_401: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg189_1, torch.float16); arg189_1 = None permute_139: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_401, [1, 0]); convert_element_type_401 = None # No stacktrace found for following nodes mm_default_38: "f16[4900, 512]" = torch.ops.aten.mm.default(view_365, permute_139); view_365 = permute_139 = None add_tensor_38: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_38, convert_element_type_400); mm_default_38 = convert_element_type_400 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_366: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_38, [100, 49, 512]); add_tensor_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_367: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_366, [-1, 7, 7, 512]); view_366 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_368: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_367, [1, 10, 10, 7, 7, -1]); view_367 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_140: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_368, [0, 1, 3, 2, 4, 5]); view_368 = None clone_159: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_140, memory_format = torch.contiguous_format); permute_140 = None view_369: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_159, [1, 70, 70, -1]); clone_159 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_511: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_369, 1, 0, 68); view_369 = None slice_512: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_511, 2, 0, 68); slice_511 = None clone_160: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_512, memory_format = torch.contiguous_format); slice_512 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_370: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_160, [1, 4624, 512]); clone_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_139: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_135, view_370); add_135 = view_370 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_405: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_139, torch.float32) var_mean_30 = torch.ops.aten.var_mean.correction(convert_element_type_405, [2], correction = 0, keepdim = True) getitem_60: "f32[1, 4624, 1]" = var_mean_30[0] getitem_61: "f32[1, 4624, 1]" = var_mean_30[1]; var_mean_30 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_414: "f16[512]" = torch.ops.prims.convert_element_type.default(arg196_1, torch.float16); arg196_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_406: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg194_1, torch.float16); arg194_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_46: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_405, getitem_61); convert_element_type_405 = getitem_61 = None add_140: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_60, 1e-05); getitem_60 = None rsqrt_30: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_140); add_140 = None mul_115: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_46, rsqrt_30); sub_46 = rsqrt_30 = None mul_116: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_115, arg191_1); mul_115 = arg191_1 = None add_141: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_116, arg192_1); mul_116 = arg192_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_408: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_141, torch.float16); add_141 = None view_371: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_408, [4624, 512]); convert_element_type_408 = None convert_element_type_407: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg193_1, torch.float16); arg193_1 = None permute_141: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_407, [1, 0]); convert_element_type_407 = None # No stacktrace found for following nodes mm_default_37: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_371, permute_141); view_371 = permute_141 = None add_tensor_37: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_37, convert_element_type_406); mm_default_37 = convert_element_type_406 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_372: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_37, [1, 4624, 2048]); add_tensor_37 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_412: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_372, torch.float32); view_372 = None mul_117: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_412, 0.5) mul_118: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_412, 0.7071067811865476); convert_element_type_412 = None erf_12: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_118); mul_118 = None add_142: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_12, 1); erf_12 = None mul_119: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_117, add_142); mul_117 = add_142 = None convert_element_type_413: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_119, torch.float16); mul_119 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_373: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_413, [4624, 2048]); convert_element_type_413 = None convert_element_type_415: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg195_1, torch.float16); arg195_1 = None permute_142: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_415, [1, 0]); convert_element_type_415 = None # No stacktrace found for following nodes mm_default_36: "f16[4624, 512]" = torch.ops.aten.mm.default(view_373, permute_142); view_373 = permute_142 = None add_tensor_36: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_36, convert_element_type_414); mm_default_36 = convert_element_type_414 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_374: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_36, [1, 4624, 512]); add_tensor_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_143: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_139, view_374); add_139 = view_374 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_419: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_143, torch.float32) var_mean_31 = torch.ops.aten.var_mean.correction(convert_element_type_419, [2], correction = 0, keepdim = True) getitem_62: "f32[1, 4624, 1]" = var_mean_31[0] getitem_63: "f32[1, 4624, 1]" = var_mean_31[1]; var_mean_31 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_431: "f16[512]" = torch.ops.prims.convert_element_type.default(arg204_1, torch.float16); arg204_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_420: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg200_1, torch.float16); arg200_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_47: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_419, getitem_63); convert_element_type_419 = getitem_63 = None add_144: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_62, 1e-05); getitem_62 = None rsqrt_31: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_144); add_144 = None mul_120: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_47, rsqrt_31); sub_47 = rsqrt_31 = None mul_121: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_120, arg197_1); mul_120 = arg197_1 = None add_145: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_121, arg198_1); mul_121 = arg198_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_375: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_145, [1, 68, 68, 512]); add_145 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_13: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_375, [0, 0, 0, 2, 0, 2], 0.0); view_375 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_24: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_146: "i64[70]" = torch.ops.aten.add.Tensor(iota_24, 3); iota_24 = None fmod_24: "i64[70]" = torch.ops.aten.fmod.Scalar(add_146, 70); add_146 = None index_37: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_13, [None, fmod_24]); constant_pad_nd_13 = fmod_24 = None iota_25: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_147: "i64[70]" = torch.ops.aten.add.Tensor(iota_25, 3); iota_25 = None fmod_25: "i64[70]" = torch.ops.aten.fmod.Scalar(add_147, 70); add_147 = None index_38: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_37, [None, None, fmod_25]); index_37 = fmod_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_376: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_38, [1, 10, 7, 10, 7, 512]); index_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_143: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_376, [0, 1, 3, 2, 4, 5]); view_376 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_163: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_143, memory_format = torch.contiguous_format); permute_143 = None view_377: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_163, [-1, 7, 7, 512]); clone_163 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_378: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_377, [-1, 49, 512]); view_377 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_422: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_378, torch.float16); view_378 = None view_379: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_422, [4900, 512]); convert_element_type_422 = None convert_element_type_421: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg199_1, torch.float16); arg199_1 = None permute_144: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_421, [1, 0]); convert_element_type_421 = None # No stacktrace found for following nodes mm_default_35: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_379, permute_144); view_379 = permute_144 = None add_tensor_35: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_35, convert_element_type_420); mm_default_35 = convert_element_type_420 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_380: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_35, [100, 49, 1536]); add_tensor_35 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_381: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_380, [100, 49, 3, 16, 32]); view_380 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_145: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_381, [2, 0, 3, 1, 4]); view_381 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_39: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_122: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_39, 0.1767766952966369); select_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_52: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_122, [100, 16, 49, 32]); mul_122 = None clone_164: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_52, memory_format = torch.contiguous_format); expand_52 = None view_382: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_164, [1600, 49, 32]); clone_164 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_40: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_146: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_40, [0, 1, 3, 2]); select_40 = None expand_53: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_146, [100, 16, 32, 49]); permute_146 = None clone_165: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_53, memory_format = torch.contiguous_format); expand_53 = None view_383: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_165, [1600, 32, 49]); clone_165 = None bmm_26: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_382, view_383); view_382 = view_383 = None view_384: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_26, [100, 16, 49, 49]); bmm_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_385: "i64[2401]" = torch.ops.aten.reshape.default(arg202_1, [-1]); arg202_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_39: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg201_1, [view_385]); arg201_1 = view_385 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_386: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_39, [49, 49, -1]); index_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_147: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_386, [2, 0, 1]); view_386 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_166: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_147, memory_format = torch.contiguous_format); permute_147 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_31: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_166, 0); clone_166 = None add_148: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_384, unsqueeze_31); view_384 = unsqueeze_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_387: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_148, [1, 100, 16, 49, 49]); add_148 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_32: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_33: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_32, 0); unsqueeze_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_149: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_387, unsqueeze_33); view_387 = unsqueeze_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_388: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_149, [-1, 16, 49, 49]); add_149 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_13: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_388, [-1], True) sub_48: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_388, amax_13); view_388 = amax_13 = None exp_13: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_48); sub_48 = None sum_14: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_13, [-1], True) div_19: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_13, sum_14); exp_13 = sum_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_428: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_19, torch.float16); div_19 = None expand_54: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_428, [100, 16, 49, 49]); convert_element_type_428 = None view_389: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_54, [1600, 49, 49]); expand_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_41: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 2); permute_145 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_55: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_41, [100, 16, 49, 32]); select_41 = None clone_168: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_55, memory_format = torch.contiguous_format); expand_55 = None view_390: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_168, [1600, 49, 32]); clone_168 = None bmm_27: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_389, view_390); view_389 = view_390 = None view_391: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_27, [100, 16, 49, 32]); bmm_27 = None permute_148: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_391, [0, 2, 1, 3]); view_391 = None clone_169: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_148, memory_format = torch.contiguous_format); permute_148 = None view_392: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_169, [100, 49, 512]); clone_169 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_393: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_392, [4900, 512]); view_392 = None convert_element_type_432: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg203_1, torch.float16); arg203_1 = None permute_149: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_432, [1, 0]); convert_element_type_432 = None # No stacktrace found for following nodes mm_default_34: "f16[4900, 512]" = torch.ops.aten.mm.default(view_393, permute_149); view_393 = permute_149 = None add_tensor_34: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_34, convert_element_type_431); mm_default_34 = convert_element_type_431 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_394: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_34, [100, 49, 512]); add_tensor_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_395: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_394, [-1, 7, 7, 512]); view_394 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_396: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_395, [1, 10, 10, 7, 7, -1]); view_395 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_150: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_396, [0, 1, 3, 2, 4, 5]); view_396 = None clone_171: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_150, memory_format = torch.contiguous_format); permute_150 = None view_397: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_171, [1, 70, 70, -1]); clone_171 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_26: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_150: "i64[70]" = torch.ops.aten.add.Tensor(iota_26, 67); iota_26 = None fmod_26: "i64[70]" = torch.ops.aten.fmod.Scalar(add_150, 70); add_150 = None index_40: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_397, [None, fmod_26]); view_397 = fmod_26 = None iota_27: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_151: "i64[70]" = torch.ops.aten.add.Tensor(iota_27, 67); iota_27 = None fmod_27: "i64[70]" = torch.ops.aten.fmod.Scalar(add_151, 70); add_151 = None index_41: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_40, [None, None, fmod_27]); index_40 = fmod_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_521: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_41, 1, 0, 68); index_41 = None slice_522: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_521, 2, 0, 68); slice_521 = None clone_172: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_522, memory_format = torch.contiguous_format); slice_522 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_398: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_172, [1, 4624, 512]); clone_172 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_152: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_143, view_398); add_143 = view_398 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_436: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_152, torch.float32) var_mean_32 = torch.ops.aten.var_mean.correction(convert_element_type_436, [2], correction = 0, keepdim = True) getitem_64: "f32[1, 4624, 1]" = var_mean_32[0] getitem_65: "f32[1, 4624, 1]" = var_mean_32[1]; var_mean_32 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_445: "f16[512]" = torch.ops.prims.convert_element_type.default(arg210_1, torch.float16); arg210_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_437: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg208_1, torch.float16); arg208_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_49: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_436, getitem_65); convert_element_type_436 = getitem_65 = None add_153: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None rsqrt_32: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_153); add_153 = None mul_123: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_49, rsqrt_32); sub_49 = rsqrt_32 = None mul_124: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_123, arg205_1); mul_123 = arg205_1 = None add_154: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_124, arg206_1); mul_124 = arg206_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_439: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_154, torch.float16); add_154 = None view_399: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_439, [4624, 512]); convert_element_type_439 = None convert_element_type_438: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg207_1, torch.float16); arg207_1 = None permute_151: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_438, [1, 0]); convert_element_type_438 = None # No stacktrace found for following nodes mm_default_33: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_399, permute_151); view_399 = permute_151 = None add_tensor_33: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_33, convert_element_type_437); mm_default_33 = convert_element_type_437 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_400: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_33, [1, 4624, 2048]); add_tensor_33 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_443: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_400, torch.float32); view_400 = None mul_125: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_443, 0.5) mul_126: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_443, 0.7071067811865476); convert_element_type_443 = None erf_13: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_126); mul_126 = None add_155: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_13, 1); erf_13 = None mul_127: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_125, add_155); mul_125 = add_155 = None convert_element_type_444: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_127, torch.float16); mul_127 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_401: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_444, [4624, 2048]); convert_element_type_444 = None convert_element_type_446: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg209_1, torch.float16); arg209_1 = None permute_152: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_446, [1, 0]); convert_element_type_446 = None # No stacktrace found for following nodes mm_default_32: "f16[4624, 512]" = torch.ops.aten.mm.default(view_401, permute_152); view_401 = permute_152 = None add_tensor_32: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_32, convert_element_type_445); mm_default_32 = convert_element_type_445 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_402: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_32, [1, 4624, 512]); add_tensor_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_156: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_152, view_402); add_152 = view_402 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_450: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_156, torch.float32) var_mean_33 = torch.ops.aten.var_mean.correction(convert_element_type_450, [2], correction = 0, keepdim = True) getitem_66: "f32[1, 4624, 1]" = var_mean_33[0] getitem_67: "f32[1, 4624, 1]" = var_mean_33[1]; var_mean_33 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_462: "f16[512]" = torch.ops.prims.convert_element_type.default(arg218_1, torch.float16); arg218_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_451: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg214_1, torch.float16); arg214_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_50: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_450, getitem_67); convert_element_type_450 = getitem_67 = None add_157: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None rsqrt_33: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_157); add_157 = None mul_128: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_50, rsqrt_33); sub_50 = rsqrt_33 = None mul_129: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_128, arg211_1); mul_128 = arg211_1 = None add_158: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_129, arg212_1); mul_129 = arg212_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_403: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_158, [1, 68, 68, 512]); add_158 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_14: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_403, [0, 0, 0, 2, 0, 2], 0.0); view_403 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_404: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_14, [1, 10, 7, 10, 7, 512]); constant_pad_nd_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_153: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_404, [0, 1, 3, 2, 4, 5]); view_404 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_175: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_153, memory_format = torch.contiguous_format); permute_153 = None view_405: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_175, [-1, 7, 7, 512]); clone_175 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_406: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_405, [-1, 49, 512]); view_405 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_453: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_406, torch.float16); view_406 = None view_407: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_453, [4900, 512]); convert_element_type_453 = None convert_element_type_452: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg213_1, torch.float16); arg213_1 = None permute_154: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_452, [1, 0]); convert_element_type_452 = None # No stacktrace found for following nodes mm_default_31: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_407, permute_154); view_407 = permute_154 = None add_tensor_31: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_31, convert_element_type_451); mm_default_31 = convert_element_type_451 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_408: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_31, [100, 49, 1536]); add_tensor_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_409: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_408, [100, 49, 3, 16, 32]); view_408 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_155: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_409, [2, 0, 3, 1, 4]); view_409 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_42: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_130: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_42, 0.1767766952966369); select_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_56: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_130, [100, 16, 49, 32]); mul_130 = None clone_176: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_56, memory_format = torch.contiguous_format); expand_56 = None view_410: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_176, [1600, 49, 32]); clone_176 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_43: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_156: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_43, [0, 1, 3, 2]); select_43 = None expand_57: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_156, [100, 16, 32, 49]); permute_156 = None clone_177: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_57, memory_format = torch.contiguous_format); expand_57 = None view_411: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_177, [1600, 32, 49]); clone_177 = None bmm_28: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_410, view_411); view_410 = view_411 = None view_412: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_28, [100, 16, 49, 49]); bmm_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_413: "i64[2401]" = torch.ops.aten.reshape.default(arg216_1, [-1]); arg216_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_42: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg215_1, [view_413]); arg215_1 = view_413 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_414: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_42, [49, 49, -1]); index_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_157: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_414, [2, 0, 1]); view_414 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_178: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_157, memory_format = torch.contiguous_format); permute_157 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_34: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_178, 0); clone_178 = None add_159: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_412, unsqueeze_34); view_412 = unsqueeze_34 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_14: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_159, [-1], True) sub_51: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_159, amax_14); add_159 = amax_14 = None exp_14: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_51); sub_51 = None sum_15: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_14, [-1], True) div_20: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_14, sum_15); exp_14 = sum_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_459: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_20, torch.float16); div_20 = None expand_58: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_459, [100, 16, 49, 49]); convert_element_type_459 = None view_415: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_58, [1600, 49, 49]); expand_58 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_44: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 2); permute_155 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_59: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_44, [100, 16, 49, 32]); select_44 = None clone_180: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_59, memory_format = torch.contiguous_format); expand_59 = None view_416: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_180, [1600, 49, 32]); clone_180 = None bmm_29: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_415, view_416); view_415 = view_416 = None view_417: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_29, [100, 16, 49, 32]); bmm_29 = None permute_158: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_417, [0, 2, 1, 3]); view_417 = None clone_181: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_158, memory_format = torch.contiguous_format); permute_158 = None view_418: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_181, [100, 49, 512]); clone_181 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_419: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_418, [4900, 512]); view_418 = None convert_element_type_463: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg217_1, torch.float16); arg217_1 = None permute_159: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_463, [1, 0]); convert_element_type_463 = None # No stacktrace found for following nodes mm_default_30: "f16[4900, 512]" = torch.ops.aten.mm.default(view_419, permute_159); view_419 = permute_159 = None add_tensor_30: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_30, convert_element_type_462); mm_default_30 = convert_element_type_462 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_420: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_30, [100, 49, 512]); add_tensor_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_421: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_420, [-1, 7, 7, 512]); view_420 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_422: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_421, [1, 10, 10, 7, 7, -1]); view_421 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_160: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_422, [0, 1, 3, 2, 4, 5]); view_422 = None clone_183: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_160, memory_format = torch.contiguous_format); permute_160 = None view_423: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_183, [1, 70, 70, -1]); clone_183 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_525: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_423, 1, 0, 68); view_423 = None slice_526: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_525, 2, 0, 68); slice_525 = None clone_184: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_526, memory_format = torch.contiguous_format); slice_526 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_424: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_184, [1, 4624, 512]); clone_184 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_160: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_156, view_424); add_156 = view_424 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_467: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_160, torch.float32) var_mean_34 = torch.ops.aten.var_mean.correction(convert_element_type_467, [2], correction = 0, keepdim = True) getitem_68: "f32[1, 4624, 1]" = var_mean_34[0] getitem_69: "f32[1, 4624, 1]" = var_mean_34[1]; var_mean_34 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_476: "f16[512]" = torch.ops.prims.convert_element_type.default(arg224_1, torch.float16); arg224_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_468: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg222_1, torch.float16); arg222_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_52: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_467, getitem_69); convert_element_type_467 = getitem_69 = None add_161: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_68, 1e-05); getitem_68 = None rsqrt_34: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_161); add_161 = None mul_131: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_52, rsqrt_34); sub_52 = rsqrt_34 = None mul_132: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_131, arg219_1); mul_131 = arg219_1 = None add_162: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_132, arg220_1); mul_132 = arg220_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_470: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_162, torch.float16); add_162 = None view_425: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_470, [4624, 512]); convert_element_type_470 = None convert_element_type_469: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg221_1, torch.float16); arg221_1 = None permute_161: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_469, [1, 0]); convert_element_type_469 = None # No stacktrace found for following nodes mm_default_29: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_425, permute_161); view_425 = permute_161 = None add_tensor_29: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_29, convert_element_type_468); mm_default_29 = convert_element_type_468 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_426: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_29, [1, 4624, 2048]); add_tensor_29 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_474: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_426, torch.float32); view_426 = None mul_133: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_474, 0.5) mul_134: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_474, 0.7071067811865476); convert_element_type_474 = None erf_14: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_134); mul_134 = None add_163: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_14, 1); erf_14 = None mul_135: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_133, add_163); mul_133 = add_163 = None convert_element_type_475: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_135, torch.float16); mul_135 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_427: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_475, [4624, 2048]); convert_element_type_475 = None convert_element_type_477: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg223_1, torch.float16); arg223_1 = None permute_162: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_477, [1, 0]); convert_element_type_477 = None # No stacktrace found for following nodes mm_default_28: "f16[4624, 512]" = torch.ops.aten.mm.default(view_427, permute_162); view_427 = permute_162 = None add_tensor_28: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_28, convert_element_type_476); mm_default_28 = convert_element_type_476 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_428: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_28, [1, 4624, 512]); add_tensor_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_164: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_160, view_428); add_160 = view_428 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_481: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_164, torch.float32) var_mean_35 = torch.ops.aten.var_mean.correction(convert_element_type_481, [2], correction = 0, keepdim = True) getitem_70: "f32[1, 4624, 1]" = var_mean_35[0] getitem_71: "f32[1, 4624, 1]" = var_mean_35[1]; var_mean_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_493: "f16[512]" = torch.ops.prims.convert_element_type.default(arg232_1, torch.float16); arg232_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_482: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg228_1, torch.float16); arg228_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_53: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_481, getitem_71); convert_element_type_481 = getitem_71 = None add_165: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_70, 1e-05); getitem_70 = None rsqrt_35: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_165); add_165 = None mul_136: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_53, rsqrt_35); sub_53 = rsqrt_35 = None mul_137: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_136, arg225_1); mul_136 = arg225_1 = None add_166: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_137, arg226_1); mul_137 = arg226_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_429: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_166, [1, 68, 68, 512]); add_166 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_15: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_429, [0, 0, 0, 2, 0, 2], 0.0); view_429 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_28: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_167: "i64[70]" = torch.ops.aten.add.Tensor(iota_28, 3); iota_28 = None fmod_28: "i64[70]" = torch.ops.aten.fmod.Scalar(add_167, 70); add_167 = None index_43: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_15, [None, fmod_28]); constant_pad_nd_15 = fmod_28 = None iota_29: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_168: "i64[70]" = torch.ops.aten.add.Tensor(iota_29, 3); iota_29 = None fmod_29: "i64[70]" = torch.ops.aten.fmod.Scalar(add_168, 70); add_168 = None index_44: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_43, [None, None, fmod_29]); index_43 = fmod_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_430: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_44, [1, 10, 7, 10, 7, 512]); index_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_163: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_430, [0, 1, 3, 2, 4, 5]); view_430 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_187: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_163, memory_format = torch.contiguous_format); permute_163 = None view_431: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_187, [-1, 7, 7, 512]); clone_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_432: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_431, [-1, 49, 512]); view_431 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_484: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_432, torch.float16); view_432 = None view_433: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_484, [4900, 512]); convert_element_type_484 = None convert_element_type_483: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg227_1, torch.float16); arg227_1 = None permute_164: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_483, [1, 0]); convert_element_type_483 = None # No stacktrace found for following nodes mm_default_27: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_433, permute_164); view_433 = permute_164 = None add_tensor_27: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_27, convert_element_type_482); mm_default_27 = convert_element_type_482 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_434: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_27, [100, 49, 1536]); add_tensor_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_435: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_434, [100, 49, 3, 16, 32]); view_434 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_165: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_435, [2, 0, 3, 1, 4]); view_435 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_45: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_138: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_45, 0.1767766952966369); select_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_60: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_138, [100, 16, 49, 32]); mul_138 = None clone_188: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_60, memory_format = torch.contiguous_format); expand_60 = None view_436: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_188, [1600, 49, 32]); clone_188 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_46: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_166: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_46, [0, 1, 3, 2]); select_46 = None expand_61: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_166, [100, 16, 32, 49]); permute_166 = None clone_189: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_61, memory_format = torch.contiguous_format); expand_61 = None view_437: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_189, [1600, 32, 49]); clone_189 = None bmm_30: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_436, view_437); view_436 = view_437 = None view_438: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_30, [100, 16, 49, 49]); bmm_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_439: "i64[2401]" = torch.ops.aten.reshape.default(arg230_1, [-1]); arg230_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_45: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg229_1, [view_439]); arg229_1 = view_439 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_440: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_45, [49, 49, -1]); index_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_167: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_440, [2, 0, 1]); view_440 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_190: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_167, memory_format = torch.contiguous_format); permute_167 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_35: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_190, 0); clone_190 = None add_169: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_438, unsqueeze_35); view_438 = unsqueeze_35 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_441: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_169, [1, 100, 16, 49, 49]); add_169 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_36: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_37: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_36, 0); unsqueeze_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_170: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_441, unsqueeze_37); view_441 = unsqueeze_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_442: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_170, [-1, 16, 49, 49]); add_170 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_15: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_442, [-1], True) sub_54: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_442, amax_15); view_442 = amax_15 = None exp_15: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_54); sub_54 = None sum_16: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_15, [-1], True) div_21: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_15, sum_16); exp_15 = sum_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_490: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_21, torch.float16); div_21 = None expand_62: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_490, [100, 16, 49, 49]); convert_element_type_490 = None view_443: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_62, [1600, 49, 49]); expand_62 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_47: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 2); permute_165 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_63: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_47, [100, 16, 49, 32]); select_47 = None clone_192: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_63, memory_format = torch.contiguous_format); expand_63 = None view_444: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_192, [1600, 49, 32]); clone_192 = None bmm_31: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_443, view_444); view_443 = view_444 = None view_445: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_31, [100, 16, 49, 32]); bmm_31 = None permute_168: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_445, [0, 2, 1, 3]); view_445 = None clone_193: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_168, memory_format = torch.contiguous_format); permute_168 = None view_446: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_193, [100, 49, 512]); clone_193 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_447: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_446, [4900, 512]); view_446 = None convert_element_type_494: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg231_1, torch.float16); arg231_1 = None permute_169: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_494, [1, 0]); convert_element_type_494 = None # No stacktrace found for following nodes mm_default_26: "f16[4900, 512]" = torch.ops.aten.mm.default(view_447, permute_169); view_447 = permute_169 = None add_tensor_26: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_26, convert_element_type_493); mm_default_26 = convert_element_type_493 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_448: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_26, [100, 49, 512]); add_tensor_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_449: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_448, [-1, 7, 7, 512]); view_448 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_450: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_449, [1, 10, 10, 7, 7, -1]); view_449 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_170: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_450, [0, 1, 3, 2, 4, 5]); view_450 = None clone_195: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_170, memory_format = torch.contiguous_format); permute_170 = None view_451: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_195, [1, 70, 70, -1]); clone_195 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_30: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_171: "i64[70]" = torch.ops.aten.add.Tensor(iota_30, 67); iota_30 = None fmod_30: "i64[70]" = torch.ops.aten.fmod.Scalar(add_171, 70); add_171 = None index_46: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_451, [None, fmod_30]); view_451 = fmod_30 = None iota_31: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_172: "i64[70]" = torch.ops.aten.add.Tensor(iota_31, 67); iota_31 = None fmod_31: "i64[70]" = torch.ops.aten.fmod.Scalar(add_172, 70); add_172 = None index_47: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_46, [None, None, fmod_31]); index_46 = fmod_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_535: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_47, 1, 0, 68); index_47 = None slice_536: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_535, 2, 0, 68); slice_535 = None clone_196: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_536, memory_format = torch.contiguous_format); slice_536 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_452: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_196, [1, 4624, 512]); clone_196 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_173: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_164, view_452); add_164 = view_452 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_498: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_173, torch.float32) var_mean_36 = torch.ops.aten.var_mean.correction(convert_element_type_498, [2], correction = 0, keepdim = True) getitem_72: "f32[1, 4624, 1]" = var_mean_36[0] getitem_73: "f32[1, 4624, 1]" = var_mean_36[1]; var_mean_36 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_507: "f16[512]" = torch.ops.prims.convert_element_type.default(arg238_1, torch.float16); arg238_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_499: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg236_1, torch.float16); arg236_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_55: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_498, getitem_73); convert_element_type_498 = getitem_73 = None add_174: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_72, 1e-05); getitem_72 = None rsqrt_36: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_174); add_174 = None mul_139: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_55, rsqrt_36); sub_55 = rsqrt_36 = None mul_140: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_139, arg233_1); mul_139 = arg233_1 = None add_175: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_140, arg234_1); mul_140 = arg234_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_501: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_175, torch.float16); add_175 = None view_453: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_501, [4624, 512]); convert_element_type_501 = None convert_element_type_500: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg235_1, torch.float16); arg235_1 = None permute_171: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_500, [1, 0]); convert_element_type_500 = None # No stacktrace found for following nodes mm_default_25: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_453, permute_171); view_453 = permute_171 = None add_tensor_25: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_25, convert_element_type_499); mm_default_25 = convert_element_type_499 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_454: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_25, [1, 4624, 2048]); add_tensor_25 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_505: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_454, torch.float32); view_454 = None mul_141: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_505, 0.5) mul_142: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_505, 0.7071067811865476); convert_element_type_505 = None erf_15: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_142); mul_142 = None add_176: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_15, 1); erf_15 = None mul_143: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_141, add_176); mul_141 = add_176 = None convert_element_type_506: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_143, torch.float16); mul_143 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_455: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_506, [4624, 2048]); convert_element_type_506 = None convert_element_type_508: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg237_1, torch.float16); arg237_1 = None permute_172: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_508, [1, 0]); convert_element_type_508 = None # No stacktrace found for following nodes mm_default_24: "f16[4624, 512]" = torch.ops.aten.mm.default(view_455, permute_172); view_455 = permute_172 = None add_tensor_24: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_24, convert_element_type_507); mm_default_24 = convert_element_type_507 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_456: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_24, [1, 4624, 512]); add_tensor_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_177: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_173, view_456); add_173 = view_456 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_512: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_177, torch.float32) var_mean_37 = torch.ops.aten.var_mean.correction(convert_element_type_512, [2], correction = 0, keepdim = True) getitem_74: "f32[1, 4624, 1]" = var_mean_37[0] getitem_75: "f32[1, 4624, 1]" = var_mean_37[1]; var_mean_37 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_524: "f16[512]" = torch.ops.prims.convert_element_type.default(arg246_1, torch.float16); arg246_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_513: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg242_1, torch.float16); arg242_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_56: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_512, getitem_75); convert_element_type_512 = getitem_75 = None add_178: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_74, 1e-05); getitem_74 = None rsqrt_37: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_178); add_178 = None mul_144: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_56, rsqrt_37); sub_56 = rsqrt_37 = None mul_145: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_144, arg239_1); mul_144 = arg239_1 = None add_179: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_145, arg240_1); mul_145 = arg240_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_457: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_179, [1, 68, 68, 512]); add_179 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_16: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_457, [0, 0, 0, 2, 0, 2], 0.0); view_457 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_458: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_16, [1, 10, 7, 10, 7, 512]); constant_pad_nd_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_173: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_458, [0, 1, 3, 2, 4, 5]); view_458 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_199: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_173, memory_format = torch.contiguous_format); permute_173 = None view_459: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_199, [-1, 7, 7, 512]); clone_199 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_460: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_459, [-1, 49, 512]); view_459 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_515: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_460, torch.float16); view_460 = None view_461: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_515, [4900, 512]); convert_element_type_515 = None convert_element_type_514: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg241_1, torch.float16); arg241_1 = None permute_174: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_514, [1, 0]); convert_element_type_514 = None # No stacktrace found for following nodes mm_default_23: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_461, permute_174); view_461 = permute_174 = None add_tensor_23: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_23, convert_element_type_513); mm_default_23 = convert_element_type_513 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_462: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_23, [100, 49, 1536]); add_tensor_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_463: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_462, [100, 49, 3, 16, 32]); view_462 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_175: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_463, [2, 0, 3, 1, 4]); view_463 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_48: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_146: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_48, 0.1767766952966369); select_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_64: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_146, [100, 16, 49, 32]); mul_146 = None clone_200: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_64, memory_format = torch.contiguous_format); expand_64 = None view_464: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_200, [1600, 49, 32]); clone_200 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_49: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_176: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_49, [0, 1, 3, 2]); select_49 = None expand_65: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_176, [100, 16, 32, 49]); permute_176 = None clone_201: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_65, memory_format = torch.contiguous_format); expand_65 = None view_465: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_201, [1600, 32, 49]); clone_201 = None bmm_32: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_464, view_465); view_464 = view_465 = None view_466: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_32, [100, 16, 49, 49]); bmm_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_467: "i64[2401]" = torch.ops.aten.reshape.default(arg244_1, [-1]); arg244_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_48: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg243_1, [view_467]); arg243_1 = view_467 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_468: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_48, [49, 49, -1]); index_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_177: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_468, [2, 0, 1]); view_468 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_202: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_177, memory_format = torch.contiguous_format); permute_177 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_38: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_202, 0); clone_202 = None add_180: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_466, unsqueeze_38); view_466 = unsqueeze_38 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_16: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_180, [-1], True) sub_57: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_180, amax_16); add_180 = amax_16 = None exp_16: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_57); sub_57 = None sum_17: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_16, [-1], True) div_22: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_16, sum_17); exp_16 = sum_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_521: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_22, torch.float16); div_22 = None expand_66: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_521, [100, 16, 49, 49]); convert_element_type_521 = None view_469: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_66, [1600, 49, 49]); expand_66 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_50: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 2); permute_175 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_67: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_50, [100, 16, 49, 32]); select_50 = None clone_204: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_67, memory_format = torch.contiguous_format); expand_67 = None view_470: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_204, [1600, 49, 32]); clone_204 = None bmm_33: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_469, view_470); view_469 = view_470 = None view_471: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_33, [100, 16, 49, 32]); bmm_33 = None permute_178: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_471, [0, 2, 1, 3]); view_471 = None clone_205: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_178, memory_format = torch.contiguous_format); permute_178 = None view_472: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_205, [100, 49, 512]); clone_205 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_473: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_472, [4900, 512]); view_472 = None convert_element_type_525: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg245_1, torch.float16); arg245_1 = None permute_179: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_525, [1, 0]); convert_element_type_525 = None # No stacktrace found for following nodes mm_default_22: "f16[4900, 512]" = torch.ops.aten.mm.default(view_473, permute_179); view_473 = permute_179 = None add_tensor_22: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_22, convert_element_type_524); mm_default_22 = convert_element_type_524 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_474: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_22, [100, 49, 512]); add_tensor_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_475: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_474, [-1, 7, 7, 512]); view_474 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_476: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_475, [1, 10, 10, 7, 7, -1]); view_475 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_180: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_476, [0, 1, 3, 2, 4, 5]); view_476 = None clone_207: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_180, memory_format = torch.contiguous_format); permute_180 = None view_477: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_207, [1, 70, 70, -1]); clone_207 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_539: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_477, 1, 0, 68); view_477 = None slice_540: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_539, 2, 0, 68); slice_539 = None clone_208: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_540, memory_format = torch.contiguous_format); slice_540 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_478: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_208, [1, 4624, 512]); clone_208 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_181: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_177, view_478); add_177 = view_478 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_529: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_181, torch.float32) var_mean_38 = torch.ops.aten.var_mean.correction(convert_element_type_529, [2], correction = 0, keepdim = True) getitem_76: "f32[1, 4624, 1]" = var_mean_38[0] getitem_77: "f32[1, 4624, 1]" = var_mean_38[1]; var_mean_38 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_538: "f16[512]" = torch.ops.prims.convert_element_type.default(arg252_1, torch.float16); arg252_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_530: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg250_1, torch.float16); arg250_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_58: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_529, getitem_77); convert_element_type_529 = getitem_77 = None add_182: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_76, 1e-05); getitem_76 = None rsqrt_38: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_182); add_182 = None mul_147: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_58, rsqrt_38); sub_58 = rsqrt_38 = None mul_148: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_147, arg247_1); mul_147 = arg247_1 = None add_183: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_148, arg248_1); mul_148 = arg248_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_532: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_183, torch.float16); add_183 = None view_479: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_532, [4624, 512]); convert_element_type_532 = None convert_element_type_531: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg249_1, torch.float16); arg249_1 = None permute_181: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_531, [1, 0]); convert_element_type_531 = None # No stacktrace found for following nodes mm_default_21: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_479, permute_181); view_479 = permute_181 = None add_tensor_21: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_21, convert_element_type_530); mm_default_21 = convert_element_type_530 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_480: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_21, [1, 4624, 2048]); add_tensor_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_536: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_480, torch.float32); view_480 = None mul_149: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_536, 0.5) mul_150: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_536, 0.7071067811865476); convert_element_type_536 = None erf_16: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_150); mul_150 = None add_184: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_16, 1); erf_16 = None mul_151: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_149, add_184); mul_149 = add_184 = None convert_element_type_537: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_151, torch.float16); mul_151 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_481: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_537, [4624, 2048]); convert_element_type_537 = None convert_element_type_539: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg251_1, torch.float16); arg251_1 = None permute_182: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_539, [1, 0]); convert_element_type_539 = None # No stacktrace found for following nodes mm_default_20: "f16[4624, 512]" = torch.ops.aten.mm.default(view_481, permute_182); view_481 = permute_182 = None add_tensor_20: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_20, convert_element_type_538); mm_default_20 = convert_element_type_538 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_482: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_20, [1, 4624, 512]); add_tensor_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_185: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_181, view_482); add_181 = view_482 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_543: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_185, torch.float32) var_mean_39 = torch.ops.aten.var_mean.correction(convert_element_type_543, [2], correction = 0, keepdim = True) getitem_78: "f32[1, 4624, 1]" = var_mean_39[0] getitem_79: "f32[1, 4624, 1]" = var_mean_39[1]; var_mean_39 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_555: "f16[512]" = torch.ops.prims.convert_element_type.default(arg260_1, torch.float16); arg260_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_544: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg256_1, torch.float16); arg256_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_59: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_543, getitem_79); convert_element_type_543 = getitem_79 = None add_186: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_78, 1e-05); getitem_78 = None rsqrt_39: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_186); add_186 = None mul_152: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_59, rsqrt_39); sub_59 = rsqrt_39 = None mul_153: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_152, arg253_1); mul_152 = arg253_1 = None add_187: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_153, arg254_1); mul_153 = arg254_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_483: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_187, [1, 68, 68, 512]); add_187 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_17: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_483, [0, 0, 0, 2, 0, 2], 0.0); view_483 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_32: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_188: "i64[70]" = torch.ops.aten.add.Tensor(iota_32, 3); iota_32 = None fmod_32: "i64[70]" = torch.ops.aten.fmod.Scalar(add_188, 70); add_188 = None index_49: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_17, [None, fmod_32]); constant_pad_nd_17 = fmod_32 = None iota_33: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_189: "i64[70]" = torch.ops.aten.add.Tensor(iota_33, 3); iota_33 = None fmod_33: "i64[70]" = torch.ops.aten.fmod.Scalar(add_189, 70); add_189 = None index_50: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_49, [None, None, fmod_33]); index_49 = fmod_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_484: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_50, [1, 10, 7, 10, 7, 512]); index_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_183: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_484, [0, 1, 3, 2, 4, 5]); view_484 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_211: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_183, memory_format = torch.contiguous_format); permute_183 = None view_485: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_211, [-1, 7, 7, 512]); clone_211 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_486: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_485, [-1, 49, 512]); view_485 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_546: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_486, torch.float16); view_486 = None view_487: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_546, [4900, 512]); convert_element_type_546 = None convert_element_type_545: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg255_1, torch.float16); arg255_1 = None permute_184: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_545, [1, 0]); convert_element_type_545 = None # No stacktrace found for following nodes mm_default_19: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_487, permute_184); view_487 = permute_184 = None add_tensor_19: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_19, convert_element_type_544); mm_default_19 = convert_element_type_544 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_488: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_19, [100, 49, 1536]); add_tensor_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_489: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_488, [100, 49, 3, 16, 32]); view_488 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_185: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_489, [2, 0, 3, 1, 4]); view_489 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_51: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_154: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_51, 0.1767766952966369); select_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_68: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_154, [100, 16, 49, 32]); mul_154 = None clone_212: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_68, memory_format = torch.contiguous_format); expand_68 = None view_490: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_212, [1600, 49, 32]); clone_212 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_52: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_186: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_52, [0, 1, 3, 2]); select_52 = None expand_69: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_186, [100, 16, 32, 49]); permute_186 = None clone_213: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_69, memory_format = torch.contiguous_format); expand_69 = None view_491: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_213, [1600, 32, 49]); clone_213 = None bmm_34: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_490, view_491); view_490 = view_491 = None view_492: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_34, [100, 16, 49, 49]); bmm_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_493: "i64[2401]" = torch.ops.aten.reshape.default(arg258_1, [-1]); arg258_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_51: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg257_1, [view_493]); arg257_1 = view_493 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_494: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_51, [49, 49, -1]); index_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_187: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_494, [2, 0, 1]); view_494 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_214: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_187, memory_format = torch.contiguous_format); permute_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_39: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_214, 0); clone_214 = None add_190: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_492, unsqueeze_39); view_492 = unsqueeze_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_495: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_190, [1, 100, 16, 49, 49]); add_190 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_40: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_41: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_40, 0); unsqueeze_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_191: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_495, unsqueeze_41); view_495 = unsqueeze_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_496: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_191, [-1, 16, 49, 49]); add_191 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_17: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_496, [-1], True) sub_60: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_496, amax_17); view_496 = amax_17 = None exp_17: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_60); sub_60 = None sum_18: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_17, [-1], True) div_23: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_17, sum_18); exp_17 = sum_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_552: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_23, torch.float16); div_23 = None expand_70: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_552, [100, 16, 49, 49]); convert_element_type_552 = None view_497: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_70, [1600, 49, 49]); expand_70 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_53: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 2); permute_185 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_71: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_53, [100, 16, 49, 32]); select_53 = None clone_216: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_71, memory_format = torch.contiguous_format); expand_71 = None view_498: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_216, [1600, 49, 32]); clone_216 = None bmm_35: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_497, view_498); view_497 = view_498 = None view_499: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_35, [100, 16, 49, 32]); bmm_35 = None permute_188: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_499, [0, 2, 1, 3]); view_499 = None clone_217: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_188, memory_format = torch.contiguous_format); permute_188 = None view_500: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_217, [100, 49, 512]); clone_217 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_501: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_500, [4900, 512]); view_500 = None convert_element_type_556: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg259_1, torch.float16); arg259_1 = None permute_189: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_556, [1, 0]); convert_element_type_556 = None # No stacktrace found for following nodes mm_default_18: "f16[4900, 512]" = torch.ops.aten.mm.default(view_501, permute_189); view_501 = permute_189 = None add_tensor_18: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_18, convert_element_type_555); mm_default_18 = convert_element_type_555 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_502: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_18, [100, 49, 512]); add_tensor_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_503: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_502, [-1, 7, 7, 512]); view_502 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_504: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_503, [1, 10, 10, 7, 7, -1]); view_503 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_190: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_504, [0, 1, 3, 2, 4, 5]); view_504 = None clone_219: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_190, memory_format = torch.contiguous_format); permute_190 = None view_505: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_219, [1, 70, 70, -1]); clone_219 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_34: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_192: "i64[70]" = torch.ops.aten.add.Tensor(iota_34, 67); iota_34 = None fmod_34: "i64[70]" = torch.ops.aten.fmod.Scalar(add_192, 70); add_192 = None index_52: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_505, [None, fmod_34]); view_505 = fmod_34 = None iota_35: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_193: "i64[70]" = torch.ops.aten.add.Tensor(iota_35, 67); iota_35 = None fmod_35: "i64[70]" = torch.ops.aten.fmod.Scalar(add_193, 70); add_193 = None index_53: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_52, [None, None, fmod_35]); index_52 = fmod_35 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_549: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_53, 1, 0, 68); index_53 = None slice_550: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_549, 2, 0, 68); slice_549 = None clone_220: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_550, memory_format = torch.contiguous_format); slice_550 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_506: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_220, [1, 4624, 512]); clone_220 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_194: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_185, view_506); add_185 = view_506 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_560: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_194, torch.float32) var_mean_40 = torch.ops.aten.var_mean.correction(convert_element_type_560, [2], correction = 0, keepdim = True) getitem_80: "f32[1, 4624, 1]" = var_mean_40[0] getitem_81: "f32[1, 4624, 1]" = var_mean_40[1]; var_mean_40 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_569: "f16[512]" = torch.ops.prims.convert_element_type.default(arg266_1, torch.float16); arg266_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_561: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg264_1, torch.float16); arg264_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_61: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_560, getitem_81); convert_element_type_560 = getitem_81 = None add_195: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_80, 1e-05); getitem_80 = None rsqrt_40: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_195); add_195 = None mul_155: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_61, rsqrt_40); sub_61 = rsqrt_40 = None mul_156: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_155, arg261_1); mul_155 = arg261_1 = None add_196: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_156, arg262_1); mul_156 = arg262_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_563: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_196, torch.float16); add_196 = None view_507: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_563, [4624, 512]); convert_element_type_563 = None convert_element_type_562: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg263_1, torch.float16); arg263_1 = None permute_191: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_562, [1, 0]); convert_element_type_562 = None # No stacktrace found for following nodes mm_default_17: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_507, permute_191); view_507 = permute_191 = None add_tensor_17: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_17, convert_element_type_561); mm_default_17 = convert_element_type_561 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_508: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_17, [1, 4624, 2048]); add_tensor_17 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_567: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_508, torch.float32); view_508 = None mul_157: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_567, 0.5) mul_158: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_567, 0.7071067811865476); convert_element_type_567 = None erf_17: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_158); mul_158 = None add_197: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_17, 1); erf_17 = None mul_159: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_157, add_197); mul_157 = add_197 = None convert_element_type_568: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_159, torch.float16); mul_159 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_509: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_568, [4624, 2048]); convert_element_type_568 = None convert_element_type_570: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg265_1, torch.float16); arg265_1 = None permute_192: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_570, [1, 0]); convert_element_type_570 = None # No stacktrace found for following nodes mm_default_16: "f16[4624, 512]" = torch.ops.aten.mm.default(view_509, permute_192); view_509 = permute_192 = None add_tensor_16: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_16, convert_element_type_569); mm_default_16 = convert_element_type_569 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_510: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_16, [1, 4624, 512]); add_tensor_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_198: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_194, view_510); add_194 = view_510 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_574: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_198, torch.float32) var_mean_41 = torch.ops.aten.var_mean.correction(convert_element_type_574, [2], correction = 0, keepdim = True) getitem_82: "f32[1, 4624, 1]" = var_mean_41[0] getitem_83: "f32[1, 4624, 1]" = var_mean_41[1]; var_mean_41 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_586: "f16[512]" = torch.ops.prims.convert_element_type.default(arg274_1, torch.float16); arg274_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_575: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg270_1, torch.float16); arg270_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_62: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_574, getitem_83); convert_element_type_574 = getitem_83 = None add_199: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_82, 1e-05); getitem_82 = None rsqrt_41: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_199); add_199 = None mul_160: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_62, rsqrt_41); sub_62 = rsqrt_41 = None mul_161: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_160, arg267_1); mul_160 = arg267_1 = None add_200: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_161, arg268_1); mul_161 = arg268_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_511: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_200, [1, 68, 68, 512]); add_200 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_18: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_511, [0, 0, 0, 2, 0, 2], 0.0); view_511 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_512: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_18, [1, 10, 7, 10, 7, 512]); constant_pad_nd_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_193: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_512, [0, 1, 3, 2, 4, 5]); view_512 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_223: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_193, memory_format = torch.contiguous_format); permute_193 = None view_513: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_223, [-1, 7, 7, 512]); clone_223 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_514: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_513, [-1, 49, 512]); view_513 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_577: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_514, torch.float16); view_514 = None view_515: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_577, [4900, 512]); convert_element_type_577 = None convert_element_type_576: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg269_1, torch.float16); arg269_1 = None permute_194: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_576, [1, 0]); convert_element_type_576 = None # No stacktrace found for following nodes mm_default_15: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_515, permute_194); view_515 = permute_194 = None add_tensor_15: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_15, convert_element_type_575); mm_default_15 = convert_element_type_575 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_516: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_15, [100, 49, 1536]); add_tensor_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_517: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_516, [100, 49, 3, 16, 32]); view_516 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_195: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_517, [2, 0, 3, 1, 4]); view_517 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_54: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_162: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_54, 0.1767766952966369); select_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_72: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_162, [100, 16, 49, 32]); mul_162 = None clone_224: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_72, memory_format = torch.contiguous_format); expand_72 = None view_518: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_224, [1600, 49, 32]); clone_224 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_55: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_196: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_55, [0, 1, 3, 2]); select_55 = None expand_73: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_196, [100, 16, 32, 49]); permute_196 = None clone_225: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_73, memory_format = torch.contiguous_format); expand_73 = None view_519: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_225, [1600, 32, 49]); clone_225 = None bmm_36: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_518, view_519); view_518 = view_519 = None view_520: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_36, [100, 16, 49, 49]); bmm_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_521: "i64[2401]" = torch.ops.aten.reshape.default(arg272_1, [-1]); arg272_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_54: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg271_1, [view_521]); arg271_1 = view_521 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_522: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_54, [49, 49, -1]); index_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_197: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_522, [2, 0, 1]); view_522 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_226: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_197, memory_format = torch.contiguous_format); permute_197 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_42: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_226, 0); clone_226 = None add_201: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_520, unsqueeze_42); view_520 = unsqueeze_42 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_18: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_201, [-1], True) sub_63: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_201, amax_18); add_201 = amax_18 = None exp_18: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_63); sub_63 = None sum_19: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_18, [-1], True) div_24: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_18, sum_19); exp_18 = sum_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_583: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_24, torch.float16); div_24 = None expand_74: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_583, [100, 16, 49, 49]); convert_element_type_583 = None view_523: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_74, [1600, 49, 49]); expand_74 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_56: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 2); permute_195 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_75: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_56, [100, 16, 49, 32]); select_56 = None clone_228: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_75, memory_format = torch.contiguous_format); expand_75 = None view_524: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_228, [1600, 49, 32]); clone_228 = None bmm_37: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_523, view_524); view_523 = view_524 = None view_525: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_37, [100, 16, 49, 32]); bmm_37 = None permute_198: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_525, [0, 2, 1, 3]); view_525 = None clone_229: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_198, memory_format = torch.contiguous_format); permute_198 = None view_526: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_229, [100, 49, 512]); clone_229 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_527: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_526, [4900, 512]); view_526 = None convert_element_type_587: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg273_1, torch.float16); arg273_1 = None permute_199: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_587, [1, 0]); convert_element_type_587 = None # No stacktrace found for following nodes mm_default_14: "f16[4900, 512]" = torch.ops.aten.mm.default(view_527, permute_199); view_527 = permute_199 = None add_tensor_14: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_14, convert_element_type_586); mm_default_14 = convert_element_type_586 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_528: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_14, [100, 49, 512]); add_tensor_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_529: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_528, [-1, 7, 7, 512]); view_528 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_530: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_529, [1, 10, 10, 7, 7, -1]); view_529 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_200: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_530, [0, 1, 3, 2, 4, 5]); view_530 = None clone_231: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_200, memory_format = torch.contiguous_format); permute_200 = None view_531: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_231, [1, 70, 70, -1]); clone_231 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_553: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_531, 1, 0, 68); view_531 = None slice_554: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_553, 2, 0, 68); slice_553 = None clone_232: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_554, memory_format = torch.contiguous_format); slice_554 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_532: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_232, [1, 4624, 512]); clone_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_202: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_198, view_532); add_198 = view_532 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_591: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_202, torch.float32) var_mean_42 = torch.ops.aten.var_mean.correction(convert_element_type_591, [2], correction = 0, keepdim = True) getitem_84: "f32[1, 4624, 1]" = var_mean_42[0] getitem_85: "f32[1, 4624, 1]" = var_mean_42[1]; var_mean_42 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_600: "f16[512]" = torch.ops.prims.convert_element_type.default(arg280_1, torch.float16); arg280_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_592: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg278_1, torch.float16); arg278_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_64: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_591, getitem_85); convert_element_type_591 = getitem_85 = None add_203: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_84, 1e-05); getitem_84 = None rsqrt_42: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_203); add_203 = None mul_163: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_64, rsqrt_42); sub_64 = rsqrt_42 = None mul_164: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_163, arg275_1); mul_163 = arg275_1 = None add_204: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_164, arg276_1); mul_164 = arg276_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_594: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_204, torch.float16); add_204 = None view_533: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_594, [4624, 512]); convert_element_type_594 = None convert_element_type_593: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg277_1, torch.float16); arg277_1 = None permute_201: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_593, [1, 0]); convert_element_type_593 = None # No stacktrace found for following nodes mm_default_13: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_533, permute_201); view_533 = permute_201 = None add_tensor_13: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_13, convert_element_type_592); mm_default_13 = convert_element_type_592 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_534: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_13, [1, 4624, 2048]); add_tensor_13 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_598: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_534, torch.float32); view_534 = None mul_165: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_598, 0.5) mul_166: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_598, 0.7071067811865476); convert_element_type_598 = None erf_18: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_166); mul_166 = None add_205: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_18, 1); erf_18 = None mul_167: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_165, add_205); mul_165 = add_205 = None convert_element_type_599: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_167, torch.float16); mul_167 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_535: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_599, [4624, 2048]); convert_element_type_599 = None convert_element_type_601: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg279_1, torch.float16); arg279_1 = None permute_202: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_601, [1, 0]); convert_element_type_601 = None # No stacktrace found for following nodes mm_default_12: "f16[4624, 512]" = torch.ops.aten.mm.default(view_535, permute_202); view_535 = permute_202 = None add_tensor_12: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_12, convert_element_type_600); mm_default_12 = convert_element_type_600 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_536: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_12, [1, 4624, 512]); add_tensor_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_206: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_202, view_536); add_202 = view_536 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_605: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_206, torch.float32) var_mean_43 = torch.ops.aten.var_mean.correction(convert_element_type_605, [2], correction = 0, keepdim = True) getitem_86: "f32[1, 4624, 1]" = var_mean_43[0] getitem_87: "f32[1, 4624, 1]" = var_mean_43[1]; var_mean_43 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_617: "f16[512]" = torch.ops.prims.convert_element_type.default(arg288_1, torch.float16); arg288_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_606: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg284_1, torch.float16); arg284_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_65: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_605, getitem_87); convert_element_type_605 = getitem_87 = None add_207: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None rsqrt_43: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_207); add_207 = None mul_168: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_65, rsqrt_43); sub_65 = rsqrt_43 = None mul_169: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_168, arg281_1); mul_168 = arg281_1 = None add_208: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_169, arg282_1); mul_169 = arg282_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_537: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_208, [1, 68, 68, 512]); add_208 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_19: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_537, [0, 0, 0, 2, 0, 2], 0.0); view_537 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_36: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_209: "i64[70]" = torch.ops.aten.add.Tensor(iota_36, 3); iota_36 = None fmod_36: "i64[70]" = torch.ops.aten.fmod.Scalar(add_209, 70); add_209 = None index_55: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_19, [None, fmod_36]); constant_pad_nd_19 = fmod_36 = None iota_37: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_210: "i64[70]" = torch.ops.aten.add.Tensor(iota_37, 3); iota_37 = None fmod_37: "i64[70]" = torch.ops.aten.fmod.Scalar(add_210, 70); add_210 = None index_56: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_55, [None, None, fmod_37]); index_55 = fmod_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_538: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_56, [1, 10, 7, 10, 7, 512]); index_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_203: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_538, [0, 1, 3, 2, 4, 5]); view_538 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_235: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_203, memory_format = torch.contiguous_format); permute_203 = None view_539: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_235, [-1, 7, 7, 512]); clone_235 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_540: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_539, [-1, 49, 512]); view_539 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_608: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_540, torch.float16); view_540 = None view_541: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_608, [4900, 512]); convert_element_type_608 = None convert_element_type_607: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg283_1, torch.float16); arg283_1 = None permute_204: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_607, [1, 0]); convert_element_type_607 = None # No stacktrace found for following nodes mm_default_11: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_541, permute_204); view_541 = permute_204 = None add_tensor_11: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_11, convert_element_type_606); mm_default_11 = convert_element_type_606 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_542: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_11, [100, 49, 1536]); add_tensor_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_543: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_542, [100, 49, 3, 16, 32]); view_542 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_205: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_543, [2, 0, 3, 1, 4]); view_543 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_57: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_170: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_57, 0.1767766952966369); select_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_76: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_170, [100, 16, 49, 32]); mul_170 = None clone_236: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_76, memory_format = torch.contiguous_format); expand_76 = None view_544: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_236, [1600, 49, 32]); clone_236 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_58: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_206: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_58, [0, 1, 3, 2]); select_58 = None expand_77: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_206, [100, 16, 32, 49]); permute_206 = None clone_237: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_77, memory_format = torch.contiguous_format); expand_77 = None view_545: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_237, [1600, 32, 49]); clone_237 = None bmm_38: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_544, view_545); view_544 = view_545 = None view_546: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_38, [100, 16, 49, 49]); bmm_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_547: "i64[2401]" = torch.ops.aten.reshape.default(arg286_1, [-1]); arg286_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_57: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg285_1, [view_547]); arg285_1 = view_547 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_548: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_57, [49, 49, -1]); index_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_207: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_548, [2, 0, 1]); view_548 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_238: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_207, memory_format = torch.contiguous_format); permute_207 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_43: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_238, 0); clone_238 = None add_211: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_546, unsqueeze_43); view_546 = unsqueeze_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_549: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_211, [1, 100, 16, 49, 49]); add_211 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_44: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_45: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_44, 0); unsqueeze_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_212: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_549, unsqueeze_45); view_549 = unsqueeze_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_550: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_212, [-1, 16, 49, 49]); add_212 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_19: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_550, [-1], True) sub_66: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_550, amax_19); view_550 = amax_19 = None exp_19: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_66); sub_66 = None sum_20: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_19, [-1], True) div_25: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_19, sum_20); exp_19 = sum_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_614: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_25, torch.float16); div_25 = None expand_78: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_614, [100, 16, 49, 49]); convert_element_type_614 = None view_551: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_78, [1600, 49, 49]); expand_78 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_59: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 2); permute_205 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_79: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_59, [100, 16, 49, 32]); select_59 = None clone_240: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_79, memory_format = torch.contiguous_format); expand_79 = None view_552: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_240, [1600, 49, 32]); clone_240 = None bmm_39: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_551, view_552); view_551 = view_552 = None view_553: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_39, [100, 16, 49, 32]); bmm_39 = None permute_208: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_553, [0, 2, 1, 3]); view_553 = None clone_241: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_208, memory_format = torch.contiguous_format); permute_208 = None view_554: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_241, [100, 49, 512]); clone_241 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_555: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_554, [4900, 512]); view_554 = None convert_element_type_618: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg287_1, torch.float16); arg287_1 = None permute_209: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_618, [1, 0]); convert_element_type_618 = None # No stacktrace found for following nodes mm_default_10: "f16[4900, 512]" = torch.ops.aten.mm.default(view_555, permute_209); view_555 = permute_209 = None add_tensor_10: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_10, convert_element_type_617); mm_default_10 = convert_element_type_617 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_556: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_10, [100, 49, 512]); add_tensor_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_557: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_556, [-1, 7, 7, 512]); view_556 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_558: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_557, [1, 10, 10, 7, 7, -1]); view_557 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_210: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_558, [0, 1, 3, 2, 4, 5]); view_558 = None clone_243: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_210, memory_format = torch.contiguous_format); permute_210 = None view_559: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_243, [1, 70, 70, -1]); clone_243 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_38: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_213: "i64[70]" = torch.ops.aten.add.Tensor(iota_38, 67); iota_38 = None fmod_38: "i64[70]" = torch.ops.aten.fmod.Scalar(add_213, 70); add_213 = None index_58: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_559, [None, fmod_38]); view_559 = fmod_38 = None iota_39: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_214: "i64[70]" = torch.ops.aten.add.Tensor(iota_39, 67); iota_39 = None fmod_39: "i64[70]" = torch.ops.aten.fmod.Scalar(add_214, 70); add_214 = None index_59: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_58, [None, None, fmod_39]); index_58 = fmod_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_563: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_59, 1, 0, 68); index_59 = None slice_564: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_563, 2, 0, 68); slice_563 = None clone_244: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_564, memory_format = torch.contiguous_format); slice_564 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_560: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_244, [1, 4624, 512]); clone_244 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_215: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_206, view_560); add_206 = view_560 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_622: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_215, torch.float32) var_mean_44 = torch.ops.aten.var_mean.correction(convert_element_type_622, [2], correction = 0, keepdim = True) getitem_88: "f32[1, 4624, 1]" = var_mean_44[0] getitem_89: "f32[1, 4624, 1]" = var_mean_44[1]; var_mean_44 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_631: "f16[512]" = torch.ops.prims.convert_element_type.default(arg294_1, torch.float16); arg294_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_623: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg292_1, torch.float16); arg292_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_67: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_622, getitem_89); convert_element_type_622 = getitem_89 = None add_216: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None rsqrt_44: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_216); add_216 = None mul_171: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_67, rsqrt_44); sub_67 = rsqrt_44 = None mul_172: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_171, arg289_1); mul_171 = arg289_1 = None add_217: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_172, arg290_1); mul_172 = arg290_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_625: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_217, torch.float16); add_217 = None view_561: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_625, [4624, 512]); convert_element_type_625 = None convert_element_type_624: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg291_1, torch.float16); arg291_1 = None permute_211: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_624, [1, 0]); convert_element_type_624 = None # No stacktrace found for following nodes mm_default_9: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_561, permute_211); view_561 = permute_211 = None add_tensor_9: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_9, convert_element_type_623); mm_default_9 = convert_element_type_623 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_562: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_9, [1, 4624, 2048]); add_tensor_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_629: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_562, torch.float32); view_562 = None mul_173: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_629, 0.5) mul_174: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_629, 0.7071067811865476); convert_element_type_629 = None erf_19: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_174); mul_174 = None add_218: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_19, 1); erf_19 = None mul_175: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_173, add_218); mul_173 = add_218 = None convert_element_type_630: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_175, torch.float16); mul_175 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_563: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_630, [4624, 2048]); convert_element_type_630 = None convert_element_type_632: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg293_1, torch.float16); arg293_1 = None permute_212: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_632, [1, 0]); convert_element_type_632 = None # No stacktrace found for following nodes mm_default_8: "f16[4624, 512]" = torch.ops.aten.mm.default(view_563, permute_212); view_563 = permute_212 = None add_tensor_8: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_8, convert_element_type_631); mm_default_8 = convert_element_type_631 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_564: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_8, [1, 4624, 512]); add_tensor_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_219: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_215, view_564); add_215 = view_564 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_636: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_219, torch.float32) var_mean_45 = torch.ops.aten.var_mean.correction(convert_element_type_636, [2], correction = 0, keepdim = True) getitem_90: "f32[1, 4624, 1]" = var_mean_45[0] getitem_91: "f32[1, 4624, 1]" = var_mean_45[1]; var_mean_45 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_648: "f16[512]" = torch.ops.prims.convert_element_type.default(arg302_1, torch.float16); arg302_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_637: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg298_1, torch.float16); arg298_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_68: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_636, getitem_91); convert_element_type_636 = getitem_91 = None add_220: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_90, 1e-05); getitem_90 = None rsqrt_45: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_220); add_220 = None mul_176: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_68, rsqrt_45); sub_68 = rsqrt_45 = None mul_177: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_176, arg295_1); mul_176 = arg295_1 = None add_221: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_177, arg296_1); mul_177 = arg296_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_565: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_221, [1, 68, 68, 512]); add_221 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_20: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_565, [0, 0, 0, 2, 0, 2], 0.0); view_565 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_566: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(constant_pad_nd_20, [1, 10, 7, 10, 7, 512]); constant_pad_nd_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_213: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_566, [0, 1, 3, 2, 4, 5]); view_566 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_247: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_213, memory_format = torch.contiguous_format); permute_213 = None view_567: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_247, [-1, 7, 7, 512]); clone_247 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_568: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_567, [-1, 49, 512]); view_567 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_639: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_568, torch.float16); view_568 = None view_569: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_639, [4900, 512]); convert_element_type_639 = None convert_element_type_638: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg297_1, torch.float16); arg297_1 = None permute_214: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_638, [1, 0]); convert_element_type_638 = None # No stacktrace found for following nodes mm_default_7: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_569, permute_214); view_569 = permute_214 = None add_tensor_7: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_7, convert_element_type_637); mm_default_7 = convert_element_type_637 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_570: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_7, [100, 49, 1536]); add_tensor_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_571: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_570, [100, 49, 3, 16, 32]); view_570 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_215: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_571, [2, 0, 3, 1, 4]); view_571 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_60: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_178: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_60, 0.1767766952966369); select_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_80: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_178, [100, 16, 49, 32]); mul_178 = None clone_248: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_80, memory_format = torch.contiguous_format); expand_80 = None view_572: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_248, [1600, 49, 32]); clone_248 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_61: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_216: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_61, [0, 1, 3, 2]); select_61 = None expand_81: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_216, [100, 16, 32, 49]); permute_216 = None clone_249: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_81, memory_format = torch.contiguous_format); expand_81 = None view_573: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_249, [1600, 32, 49]); clone_249 = None bmm_40: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_572, view_573); view_572 = view_573 = None view_574: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_40, [100, 16, 49, 49]); bmm_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_575: "i64[2401]" = torch.ops.aten.reshape.default(arg300_1, [-1]); arg300_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_60: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg299_1, [view_575]); arg299_1 = view_575 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_576: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_60, [49, 49, -1]); index_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_217: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_576, [2, 0, 1]); view_576 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_250: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_217, memory_format = torch.contiguous_format); permute_217 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_46: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_250, 0); clone_250 = None add_222: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_574, unsqueeze_46); view_574 = unsqueeze_46 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_20: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_222, [-1], True) sub_69: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_222, amax_20); add_222 = amax_20 = None exp_20: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_69); sub_69 = None sum_21: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_20, [-1], True) div_26: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_20, sum_21); exp_20 = sum_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_645: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_26, torch.float16); div_26 = None expand_82: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_645, [100, 16, 49, 49]); convert_element_type_645 = None view_577: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_82, [1600, 49, 49]); expand_82 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_62: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 2); permute_215 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_83: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_62, [100, 16, 49, 32]); select_62 = None clone_252: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_83, memory_format = torch.contiguous_format); expand_83 = None view_578: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_252, [1600, 49, 32]); clone_252 = None bmm_41: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_577, view_578); view_577 = view_578 = None view_579: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_41, [100, 16, 49, 32]); bmm_41 = None permute_218: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_579, [0, 2, 1, 3]); view_579 = None clone_253: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_218, memory_format = torch.contiguous_format); permute_218 = None view_580: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_253, [100, 49, 512]); clone_253 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_581: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_580, [4900, 512]); view_580 = None convert_element_type_649: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg301_1, torch.float16); arg301_1 = None permute_219: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_649, [1, 0]); convert_element_type_649 = None # No stacktrace found for following nodes mm_default_6: "f16[4900, 512]" = torch.ops.aten.mm.default(view_581, permute_219); view_581 = permute_219 = None add_tensor_6: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_6, convert_element_type_648); mm_default_6 = convert_element_type_648 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_582: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_6, [100, 49, 512]); add_tensor_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_583: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_582, [-1, 7, 7, 512]); view_582 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_584: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_583, [1, 10, 10, 7, 7, -1]); view_583 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_220: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_584, [0, 1, 3, 2, 4, 5]); view_584 = None clone_255: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_220, memory_format = torch.contiguous_format); permute_220 = None view_585: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_255, [1, 70, 70, -1]); clone_255 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_567: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(view_585, 1, 0, 68); view_585 = None slice_568: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_567, 2, 0, 68); slice_567 = None clone_256: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_568, memory_format = torch.contiguous_format); slice_568 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_586: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_256, [1, 4624, 512]); clone_256 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_223: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_219, view_586); add_219 = view_586 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_653: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_223, torch.float32) var_mean_46 = torch.ops.aten.var_mean.correction(convert_element_type_653, [2], correction = 0, keepdim = True) getitem_92: "f32[1, 4624, 1]" = var_mean_46[0] getitem_93: "f32[1, 4624, 1]" = var_mean_46[1]; var_mean_46 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_662: "f16[512]" = torch.ops.prims.convert_element_type.default(arg308_1, torch.float16); arg308_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_654: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg306_1, torch.float16); arg306_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_70: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_653, getitem_93); convert_element_type_653 = getitem_93 = None add_224: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_92, 1e-05); getitem_92 = None rsqrt_46: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_224); add_224 = None mul_179: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_70, rsqrt_46); sub_70 = rsqrt_46 = None mul_180: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_179, arg303_1); mul_179 = arg303_1 = None add_225: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_180, arg304_1); mul_180 = arg304_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_656: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_225, torch.float16); add_225 = None view_587: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_656, [4624, 512]); convert_element_type_656 = None convert_element_type_655: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg305_1, torch.float16); arg305_1 = None permute_221: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_655, [1, 0]); convert_element_type_655 = None # No stacktrace found for following nodes mm_default_5: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_587, permute_221); view_587 = permute_221 = None add_tensor_5: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_5, convert_element_type_654); mm_default_5 = convert_element_type_654 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_588: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_5, [1, 4624, 2048]); add_tensor_5 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_660: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_588, torch.float32); view_588 = None mul_181: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_660, 0.5) mul_182: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_660, 0.7071067811865476); convert_element_type_660 = None erf_20: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_182); mul_182 = None add_226: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_20, 1); erf_20 = None mul_183: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_181, add_226); mul_181 = add_226 = None convert_element_type_661: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_183, torch.float16); mul_183 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_589: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_661, [4624, 2048]); convert_element_type_661 = None convert_element_type_663: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg307_1, torch.float16); arg307_1 = None permute_222: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_663, [1, 0]); convert_element_type_663 = None # No stacktrace found for following nodes mm_default_4: "f16[4624, 512]" = torch.ops.aten.mm.default(view_589, permute_222); view_589 = permute_222 = None add_tensor_4: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default_4, convert_element_type_662); mm_default_4 = convert_element_type_662 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_590: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor_4, [1, 4624, 512]); add_tensor_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_227: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_223, view_590); add_223 = view_590 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_667: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_227, torch.float32) var_mean_47 = torch.ops.aten.var_mean.correction(convert_element_type_667, [2], correction = 0, keepdim = True) getitem_94: "f32[1, 4624, 1]" = var_mean_47[0] getitem_95: "f32[1, 4624, 1]" = var_mean_47[1]; var_mean_47 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_679: "f16[512]" = torch.ops.prims.convert_element_type.default(arg316_1, torch.float16); arg316_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_668: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg312_1, torch.float16); arg312_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_71: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_667, getitem_95); convert_element_type_667 = getitem_95 = None add_228: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_94, 1e-05); getitem_94 = None rsqrt_47: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_228); add_228 = None mul_184: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_71, rsqrt_47); sub_71 = rsqrt_47 = None mul_185: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_184, arg309_1); mul_184 = arg309_1 = None add_229: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_185, arg310_1); mul_185 = arg310_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_591: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_229, [1, 68, 68, 512]); add_229 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_21: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_591, [0, 0, 0, 2, 0, 2], 0.0); view_591 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_40: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_230: "i64[70]" = torch.ops.aten.add.Tensor(iota_40, 3); iota_40 = None fmod_40: "i64[70]" = torch.ops.aten.fmod.Scalar(add_230, 70); add_230 = None index_61: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(constant_pad_nd_21, [None, fmod_40]); constant_pad_nd_21 = fmod_40 = None iota_41: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_231: "i64[70]" = torch.ops.aten.add.Tensor(iota_41, 3); iota_41 = None fmod_41: "i64[70]" = torch.ops.aten.fmod.Scalar(add_231, 70); add_231 = None index_62: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_61, [None, None, fmod_41]); index_61 = fmod_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_592: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.reshape.default(index_62, [1, 10, 7, 10, 7, 512]); index_62 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_223: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_592, [0, 1, 3, 2, 4, 5]); view_592 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_259: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_223, memory_format = torch.contiguous_format); permute_223 = None view_593: "f32[100, 7, 7, 512]" = torch.ops.aten.reshape.default(clone_259, [-1, 7, 7, 512]); clone_259 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_594: "f32[100, 49, 512]" = torch.ops.aten.reshape.default(view_593, [-1, 49, 512]); view_593 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_670: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_594, torch.float16); view_594 = None view_595: "f16[4900, 512]" = torch.ops.aten.reshape.default(convert_element_type_670, [4900, 512]); convert_element_type_670 = None convert_element_type_669: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg311_1, torch.float16); arg311_1 = None permute_224: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_669, [1, 0]); convert_element_type_669 = None # No stacktrace found for following nodes mm_default_3: "f16[4900, 1536]" = torch.ops.aten.mm.default(view_595, permute_224); view_595 = permute_224 = None add_tensor_3: "f16[4900, 1536]" = torch.ops.aten.add.Tensor(mm_default_3, convert_element_type_668); mm_default_3 = convert_element_type_668 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_596: "f16[100, 49, 1536]" = torch.ops.aten.reshape.default(add_tensor_3, [100, 49, 1536]); add_tensor_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_597: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.reshape.default(view_596, [100, 49, 3, 16, 32]); view_596 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_225: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_597, [2, 0, 3, 1, 4]); view_597 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_63: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 0) # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_186: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_63, 0.1767766952966369); select_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) expand_84: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_186, [100, 16, 49, 32]); mul_186 = None clone_260: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_84, memory_format = torch.contiguous_format); expand_84 = None view_598: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_260, [1600, 49, 32]); clone_260 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_64: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 1) # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_226: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_64, [0, 1, 3, 2]); select_64 = None expand_85: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_226, [100, 16, 32, 49]); permute_226 = None clone_261: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_85, memory_format = torch.contiguous_format); expand_85 = None view_599: "f16[1600, 32, 49]" = torch.ops.aten.reshape.default(clone_261, [1600, 32, 49]); clone_261 = None bmm_42: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_598, view_599); view_598 = view_599 = None view_600: "f16[100, 16, 49, 49]" = torch.ops.aten.reshape.default(bmm_42, [100, 16, 49, 49]); bmm_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_601: "i64[2401]" = torch.ops.aten.reshape.default(arg314_1, [-1]); arg314_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_63: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg313_1, [view_601]); arg313_1 = view_601 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_602: "f32[49, 49, 16]" = torch.ops.aten.reshape.default(index_63, [49, 49, -1]); index_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_227: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_602, [2, 0, 1]); view_602 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_262: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_227, memory_format = torch.contiguous_format); permute_227 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_47: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_262, 0); clone_262 = None add_232: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_600, unsqueeze_47); view_600 = unsqueeze_47 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_603: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_232, [1, 100, 16, 49, 49]); add_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_48: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1); where_5 = None unsqueeze_49: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_48, 0); unsqueeze_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_233: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_603, unsqueeze_49); view_603 = unsqueeze_49 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_604: "f32[100, 16, 49, 49]" = torch.ops.aten.reshape.default(add_233, [-1, 16, 49, 49]); add_233 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_21: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_604, [-1], True) sub_72: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_604, amax_21); view_604 = amax_21 = None exp_21: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_72); sub_72 = None sum_22: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_21, [-1], True) div_27: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_21, sum_22); exp_21 = sum_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_676: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(div_27, torch.float16); div_27 = None expand_86: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_676, [100, 16, 49, 49]); convert_element_type_676 = None view_605: "f16[1600, 49, 49]" = torch.ops.aten.reshape.default(expand_86, [1600, 49, 49]); expand_86 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_65: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 2); permute_225 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) expand_87: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_65, [100, 16, 49, 32]); select_65 = None clone_264: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_87, memory_format = torch.contiguous_format); expand_87 = None view_606: "f16[1600, 49, 32]" = torch.ops.aten.reshape.default(clone_264, [1600, 49, 32]); clone_264 = None bmm_43: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_605, view_606); view_605 = view_606 = None view_607: "f16[100, 16, 49, 32]" = torch.ops.aten.reshape.default(bmm_43, [100, 16, 49, 32]); bmm_43 = None permute_228: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_607, [0, 2, 1, 3]); view_607 = None clone_265: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_228, memory_format = torch.contiguous_format); permute_228 = None view_608: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(clone_265, [100, 49, 512]); clone_265 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_609: "f16[4900, 512]" = torch.ops.aten.reshape.default(view_608, [4900, 512]); view_608 = None convert_element_type_680: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg315_1, torch.float16); arg315_1 = None permute_229: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_680, [1, 0]); convert_element_type_680 = None # No stacktrace found for following nodes mm_default_2: "f16[4900, 512]" = torch.ops.aten.mm.default(view_609, permute_229); view_609 = permute_229 = None add_tensor_2: "f16[4900, 512]" = torch.ops.aten.add.Tensor(mm_default_2, convert_element_type_679); mm_default_2 = convert_element_type_679 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_610: "f16[100, 49, 512]" = torch.ops.aten.reshape.default(add_tensor_2, [100, 49, 512]); add_tensor_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_611: "f16[100, 7, 7, 512]" = torch.ops.aten.reshape.default(view_610, [-1, 7, 7, 512]); view_610 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_612: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.reshape.default(view_611, [1, 10, 10, 7, 7, -1]); view_611 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_230: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_612, [0, 1, 3, 2, 4, 5]); view_612 = None clone_267: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_230, memory_format = torch.contiguous_format); permute_230 = None view_613: "f16[1, 70, 70, 512]" = torch.ops.aten.reshape.default(clone_267, [1, 70, 70, -1]); clone_267 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_42: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_234: "i64[70]" = torch.ops.aten.add.Tensor(iota_42, 67); iota_42 = None fmod_42: "i64[70]" = torch.ops.aten.fmod.Scalar(add_234, 70); add_234 = None index_64: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(view_613, [None, fmod_42]); view_613 = fmod_42 = None iota_43: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_235: "i64[70]" = torch.ops.aten.add.Tensor(iota_43, 67); iota_43 = None fmod_43: "i64[70]" = torch.ops.aten.fmod.Scalar(add_235, 70); add_235 = None index_65: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(index_64, [None, None, fmod_43]); index_64 = fmod_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_577: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(index_65, 1, 0, 68); index_65 = None slice_578: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_577, 2, 0, 68); slice_577 = None clone_268: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_578, memory_format = torch.contiguous_format); slice_578 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_614: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(clone_268, [1, 4624, 512]); clone_268 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_236: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_227, view_614); add_227 = view_614 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_684: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_236, torch.float32) var_mean_48 = torch.ops.aten.var_mean.correction(convert_element_type_684, [2], correction = 0, keepdim = True) getitem_96: "f32[1, 4624, 1]" = var_mean_48[0] getitem_97: "f32[1, 4624, 1]" = var_mean_48[1]; var_mean_48 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_693: "f16[512]" = torch.ops.prims.convert_element_type.default(arg322_1, torch.float16); arg322_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_685: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg320_1, torch.float16); arg320_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_73: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_684, getitem_97); convert_element_type_684 = getitem_97 = None add_237: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_96, 1e-05); getitem_96 = None rsqrt_48: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_237); add_237 = None mul_187: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_73, rsqrt_48); sub_73 = rsqrt_48 = None mul_188: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_187, arg317_1); mul_187 = arg317_1 = None add_238: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_188, arg318_1); mul_188 = arg318_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_687: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_238, torch.float16); add_238 = None view_615: "f16[4624, 512]" = torch.ops.aten.reshape.default(convert_element_type_687, [4624, 512]); convert_element_type_687 = None convert_element_type_686: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg319_1, torch.float16); arg319_1 = None permute_231: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_686, [1, 0]); convert_element_type_686 = None # No stacktrace found for following nodes mm_default_1: "f16[4624, 2048]" = torch.ops.aten.mm.default(view_615, permute_231); view_615 = permute_231 = None add_tensor_1: "f16[4624, 2048]" = torch.ops.aten.add.Tensor(mm_default_1, convert_element_type_685); mm_default_1 = convert_element_type_685 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_616: "f16[1, 4624, 2048]" = torch.ops.aten.reshape.default(add_tensor_1, [1, 4624, 2048]); add_tensor_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_691: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_616, torch.float32); view_616 = None mul_189: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_691, 0.5) mul_190: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_691, 0.7071067811865476); convert_element_type_691 = None erf_21: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_190); mul_190 = None add_239: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_21, 1); erf_21 = None mul_191: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_189, add_239); mul_189 = add_239 = None convert_element_type_692: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_191, torch.float16); mul_191 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_617: "f16[4624, 2048]" = torch.ops.aten.reshape.default(convert_element_type_692, [4624, 2048]); convert_element_type_692 = None convert_element_type_694: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg321_1, torch.float16); arg321_1 = None permute_232: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_694, [1, 0]); convert_element_type_694 = None # No stacktrace found for following nodes mm_default: "f16[4624, 512]" = torch.ops.aten.mm.default(view_617, permute_232); view_617 = permute_232 = None add_tensor: "f16[4624, 512]" = torch.ops.aten.add.Tensor(mm_default, convert_element_type_693); mm_default = convert_element_type_693 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) view_618: "f16[1, 4624, 512]" = torch.ops.aten.reshape.default(add_tensor, [1, 4624, 512]); add_tensor = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_240: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_236, view_618); add_236 = view_618 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_698: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_240, torch.float32); add_240 = None var_mean_49 = torch.ops.aten.var_mean.correction(convert_element_type_698, [2], correction = 0, keepdim = True) getitem_98: "f32[1, 4624, 1]" = var_mean_49[0] getitem_99: "f32[1, 4624, 1]" = var_mean_49[1]; var_mean_49 = None sub_9: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_22, getitem_13); add_22 = getitem_13 = None add_25: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_12, 1e-05); getitem_12 = None rsqrt_6: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_25); add_25 = None mul_22: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_6); sub_9 = rsqrt_6 = None mul_23: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_22, arg36_1); mul_22 = arg36_1 = None add_26: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_23, arg37_1); mul_23 = arg37_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_65: "f32[1, 272, 272, 128]" = torch.ops.aten.reshape.default(add_26, [-1, 272, 272, 128]); add_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_26: "f32[1, 128, 272, 272]" = torch.ops.aten.permute.default(view_65, [0, 3, 1, 2]); view_65 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_27: "f32[1, 128, 272, 272]" = torch.ops.aten.clone.default(permute_26, memory_format = torch.contiguous_format); permute_26 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_18: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_137, getitem_25); convert_element_type_137 = getitem_25 = None add_50: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_24, 1e-05); getitem_24 = None rsqrt_12: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_50); add_50 = None mul_44: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_12); sub_18 = rsqrt_12 = None mul_45: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_44, arg69_1); mul_44 = arg69_1 = None add_51: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_45, arg70_1); mul_45 = arg70_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_128: "f32[1, 136, 136, 256]" = torch.ops.aten.reshape.default(add_51, [-1, 136, 136, 256]); add_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_50: "f32[1, 256, 136, 136]" = torch.ops.aten.permute.default(view_128, [0, 3, 1, 2]); view_128 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_53: "f32[1, 256, 136, 136]" = torch.ops.aten.clone.default(permute_50, memory_format = torch.contiguous_format); permute_50 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( sub_74: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_698, getitem_99); convert_element_type_698 = getitem_99 = None add_241: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_98, 1e-05); getitem_98 = None rsqrt_49: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_241); add_241 = None mul_192: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_74, rsqrt_49); sub_74 = rsqrt_49 = None mul_193: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_192, arg323_1); mul_192 = arg323_1 = None add_242: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_193, arg324_1); mul_193 = arg324_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_619: "f32[1, 68, 68, 512]" = torch.ops.aten.reshape.default(add_242, [-1, 68, 68, 512]); add_242 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_233: "f32[1, 512, 68, 68]" = torch.ops.aten.permute.default(view_619, [0, 3, 1, 2]); view_619 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_271: "f32[1, 512, 68, 68]" = torch.ops.aten.clone.default(permute_233, memory_format = torch.contiguous_format); permute_233 = None return (clone_27, clone_53, clone_271)