class (torch.nn.Module): def forward(self, arg0_1: "f32[1, 3, 1088, 1088]", arg1_1: "f32[128, 3, 4, 4]", arg2_1: "f32[128]", arg3_1: "f32[128]", arg4_1: "f32[128]", arg5_1: "f32[128]", arg6_1: "f32[128]", arg7_1: "f32[384, 128]", arg8_1: "f32[384]", arg9_1: "f32[169, 4]", arg10_1: "i64[49, 49]", arg11_1: "f32[128, 128]", arg12_1: "f32[128]", arg13_1: "f32[128]", arg14_1: "f32[128]", arg15_1: "f32[512, 128]", arg16_1: "f32[512]", arg17_1: "f32[128, 512]", arg18_1: "f32[128]", arg19_1: "f32[128]", arg20_1: "f32[128]", arg21_1: "f32[384, 128]", arg22_1: "f32[384]", arg23_1: "f32[169, 4]", arg24_1: "i64[49, 49]", arg25_1: "f32[128, 128]", arg26_1: "f32[128]", arg27_1: "f32[128]", arg28_1: "f32[128]", arg29_1: "f32[512, 128]", arg30_1: "f32[512]", arg31_1: "f32[128, 512]", arg32_1: "f32[128]", arg33_1: "f32[512]", arg34_1: "f32[512]", arg35_1: "f32[256, 512]", arg36_1: "f32[128]", arg37_1: "f32[128]", arg38_1: "f32[256]", arg39_1: "f32[256]", arg40_1: "f32[768, 256]", arg41_1: "f32[768]", arg42_1: "f32[169, 8]", arg43_1: "i64[49, 49]", arg44_1: "f32[256, 256]", arg45_1: "f32[256]", arg46_1: "f32[256]", arg47_1: "f32[256]", arg48_1: "f32[1024, 256]", arg49_1: "f32[1024]", arg50_1: "f32[256, 1024]", arg51_1: "f32[256]", arg52_1: "f32[256]", arg53_1: "f32[256]", arg54_1: "f32[768, 256]", arg55_1: "f32[768]", arg56_1: "f32[169, 8]", arg57_1: "i64[49, 49]", arg58_1: "f32[256, 256]", arg59_1: "f32[256]", arg60_1: "f32[256]", arg61_1: "f32[256]", arg62_1: "f32[1024, 256]", arg63_1: "f32[1024]", arg64_1: "f32[256, 1024]", arg65_1: "f32[256]", arg66_1: "f32[1024]", arg67_1: "f32[1024]", arg68_1: "f32[512, 1024]", arg69_1: "f32[256]", arg70_1: "f32[256]", arg71_1: "f32[512]", arg72_1: "f32[512]", arg73_1: "f32[1536, 512]", arg74_1: "f32[1536]", arg75_1: "f32[169, 16]", arg76_1: "i64[49, 49]", arg77_1: "f32[512, 512]", arg78_1: "f32[512]", arg79_1: "f32[512]", arg80_1: "f32[512]", arg81_1: "f32[2048, 512]", arg82_1: "f32[2048]", arg83_1: "f32[512, 2048]", arg84_1: "f32[512]", arg85_1: "f32[512]", arg86_1: "f32[512]", arg87_1: "f32[1536, 512]", arg88_1: "f32[1536]", arg89_1: "f32[169, 16]", arg90_1: "i64[49, 49]", arg91_1: "f32[512, 512]", arg92_1: "f32[512]", arg93_1: "f32[512]", arg94_1: "f32[512]", arg95_1: "f32[2048, 512]", arg96_1: "f32[2048]", arg97_1: "f32[512, 2048]", arg98_1: "f32[512]", arg99_1: "f32[512]", arg100_1: "f32[512]", arg101_1: "f32[1536, 512]", arg102_1: "f32[1536]", arg103_1: "f32[169, 16]", arg104_1: "i64[49, 49]", arg105_1: "f32[512, 512]", arg106_1: "f32[512]", arg107_1: "f32[512]", arg108_1: "f32[512]", arg109_1: "f32[2048, 512]", arg110_1: "f32[2048]", arg111_1: "f32[512, 2048]", arg112_1: "f32[512]", arg113_1: "f32[512]", arg114_1: "f32[512]", arg115_1: "f32[1536, 512]", arg116_1: "f32[1536]", arg117_1: "f32[169, 16]", arg118_1: "i64[49, 49]", arg119_1: "f32[512, 512]", arg120_1: "f32[512]", arg121_1: "f32[512]", arg122_1: "f32[512]", arg123_1: "f32[2048, 512]", arg124_1: "f32[2048]", arg125_1: "f32[512, 2048]", arg126_1: "f32[512]", arg127_1: "f32[512]", arg128_1: "f32[512]", arg129_1: "f32[1536, 512]", arg130_1: "f32[1536]", arg131_1: "f32[169, 16]", arg132_1: "i64[49, 49]", arg133_1: "f32[512, 512]", arg134_1: "f32[512]", arg135_1: "f32[512]", arg136_1: "f32[512]", arg137_1: "f32[2048, 512]", arg138_1: "f32[2048]", arg139_1: "f32[512, 2048]", arg140_1: "f32[512]", arg141_1: "f32[512]", arg142_1: "f32[512]", arg143_1: "f32[1536, 512]", arg144_1: "f32[1536]", arg145_1: "f32[169, 16]", arg146_1: "i64[49, 49]", arg147_1: "f32[512, 512]", arg148_1: "f32[512]", arg149_1: "f32[512]", arg150_1: "f32[512]", arg151_1: "f32[2048, 512]", arg152_1: "f32[2048]", arg153_1: "f32[512, 2048]", arg154_1: "f32[512]", arg155_1: "f32[512]", arg156_1: "f32[512]", arg157_1: "f32[1536, 512]", arg158_1: "f32[1536]", arg159_1: "f32[169, 16]", arg160_1: "i64[49, 49]", arg161_1: "f32[512, 512]", arg162_1: "f32[512]", arg163_1: "f32[512]", arg164_1: "f32[512]", arg165_1: "f32[2048, 512]", arg166_1: "f32[2048]", arg167_1: "f32[512, 2048]", arg168_1: "f32[512]", arg169_1: "f32[512]", arg170_1: "f32[512]", arg171_1: "f32[1536, 512]", arg172_1: "f32[1536]", arg173_1: "f32[169, 16]", arg174_1: "i64[49, 49]", arg175_1: "f32[512, 512]", arg176_1: "f32[512]", arg177_1: "f32[512]", arg178_1: "f32[512]", arg179_1: "f32[2048, 512]", arg180_1: "f32[2048]", arg181_1: "f32[512, 2048]", arg182_1: "f32[512]", arg183_1: "f32[512]", arg184_1: "f32[512]", arg185_1: "f32[1536, 512]", arg186_1: "f32[1536]", arg187_1: "f32[169, 16]", arg188_1: "i64[49, 49]", arg189_1: "f32[512, 512]", arg190_1: "f32[512]", arg191_1: "f32[512]", arg192_1: "f32[512]", arg193_1: "f32[2048, 512]", arg194_1: "f32[2048]", arg195_1: "f32[512, 2048]", arg196_1: "f32[512]", arg197_1: "f32[512]", arg198_1: "f32[512]", arg199_1: "f32[1536, 512]", arg200_1: "f32[1536]", arg201_1: "f32[169, 16]", arg202_1: "i64[49, 49]", arg203_1: "f32[512, 512]", arg204_1: "f32[512]", arg205_1: "f32[512]", arg206_1: "f32[512]", arg207_1: "f32[2048, 512]", arg208_1: "f32[2048]", arg209_1: "f32[512, 2048]", arg210_1: "f32[512]", arg211_1: "f32[512]", arg212_1: "f32[512]", arg213_1: "f32[1536, 512]", arg214_1: "f32[1536]", arg215_1: "f32[169, 16]", arg216_1: "i64[49, 49]", arg217_1: "f32[512, 512]", arg218_1: "f32[512]", arg219_1: "f32[512]", arg220_1: "f32[512]", arg221_1: "f32[2048, 512]", arg222_1: "f32[2048]", arg223_1: "f32[512, 2048]", arg224_1: "f32[512]", arg225_1: "f32[512]", arg226_1: "f32[512]", arg227_1: "f32[1536, 512]", arg228_1: "f32[1536]", arg229_1: "f32[169, 16]", arg230_1: "i64[49, 49]", arg231_1: "f32[512, 512]", arg232_1: "f32[512]", arg233_1: "f32[512]", arg234_1: "f32[512]", arg235_1: "f32[2048, 512]", arg236_1: "f32[2048]", arg237_1: "f32[512, 2048]", arg238_1: "f32[512]", arg239_1: "f32[512]", arg240_1: "f32[512]", arg241_1: "f32[1536, 512]", arg242_1: "f32[1536]", arg243_1: "f32[169, 16]", arg244_1: "i64[49, 49]", arg245_1: "f32[512, 512]", arg246_1: "f32[512]", arg247_1: "f32[512]", arg248_1: "f32[512]", arg249_1: "f32[2048, 512]", arg250_1: "f32[2048]", arg251_1: "f32[512, 2048]", arg252_1: "f32[512]", arg253_1: "f32[512]", arg254_1: "f32[512]", arg255_1: "f32[1536, 512]", arg256_1: "f32[1536]", arg257_1: "f32[169, 16]", arg258_1: "i64[49, 49]", arg259_1: "f32[512, 512]", arg260_1: "f32[512]", arg261_1: "f32[512]", arg262_1: "f32[512]", arg263_1: "f32[2048, 512]", arg264_1: "f32[2048]", arg265_1: "f32[512, 2048]", arg266_1: "f32[512]", arg267_1: "f32[512]", arg268_1: "f32[512]", arg269_1: "f32[1536, 512]", arg270_1: "f32[1536]", arg271_1: "f32[169, 16]", arg272_1: "i64[49, 49]", arg273_1: "f32[512, 512]", arg274_1: "f32[512]", arg275_1: "f32[512]", arg276_1: "f32[512]", arg277_1: "f32[2048, 512]", arg278_1: "f32[2048]", arg279_1: "f32[512, 2048]", arg280_1: "f32[512]", arg281_1: "f32[512]", arg282_1: "f32[512]", arg283_1: "f32[1536, 512]", arg284_1: "f32[1536]", arg285_1: "f32[169, 16]", arg286_1: "i64[49, 49]", arg287_1: "f32[512, 512]", arg288_1: "f32[512]", arg289_1: "f32[512]", arg290_1: "f32[512]", arg291_1: "f32[2048, 512]", arg292_1: "f32[2048]", arg293_1: "f32[512, 2048]", arg294_1: "f32[512]", arg295_1: "f32[512]", arg296_1: "f32[512]", arg297_1: "f32[1536, 512]", arg298_1: "f32[1536]", arg299_1: "f32[169, 16]", arg300_1: "i64[49, 49]", arg301_1: "f32[512, 512]", arg302_1: "f32[512]", arg303_1: "f32[512]", arg304_1: "f32[512]", arg305_1: "f32[2048, 512]", arg306_1: "f32[2048]", arg307_1: "f32[512, 2048]", arg308_1: "f32[512]", arg309_1: "f32[512]", arg310_1: "f32[512]", arg311_1: "f32[1536, 512]", arg312_1: "f32[1536]", arg313_1: "f32[169, 16]", arg314_1: "i64[49, 49]", arg315_1: "f32[512, 512]", arg316_1: "f32[512]", arg317_1: "f32[512]", arg318_1: "f32[512]", arg319_1: "f32[2048, 512]", arg320_1: "f32[2048]", arg321_1: "f32[512, 2048]", arg322_1: "f32[512]", arg323_1: "f32[512]", arg324_1: "f32[512]"): # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/conv.py:453 in _conv_forward, code: return F.conv2d(input, weight, bias, self.stride, convert_element_type: "f16[128]" = torch.ops.prims.convert_element_type.default(arg2_1, torch.float16); arg2_1 = None convert_element_type_1: "f16[128, 3, 4, 4]" = torch.ops.prims.convert_element_type.default(arg1_1, torch.float16); arg1_1 = None convert_element_type_2: "f16[1, 3, 1088, 1088]" = torch.ops.prims.convert_element_type.default(arg0_1, torch.float16); arg0_1 = None convolution: "f16[1, 128, 272, 272]" = torch.ops.aten.convolution.default(convert_element_type_2, convert_element_type_1, convert_element_type, [4, 4], [0, 0], [1, 1], False, [0, 0], 1); convert_element_type_2 = convert_element_type_1 = convert_element_type = None # File: /workspace/networks/encoders/swin/swin_transformer.py:520 in forward, code: x = x.flatten(2).transpose(1, 2) view: "f16[1, 128, 73984]" = torch.ops.aten.view.default(convolution, [1, 128, 73984]); convolution = None permute: "f16[1, 73984, 128]" = torch.ops.aten.permute.default(view, [0, 2, 1]); view = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_3: "f32[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(permute, torch.float32); permute = None clone: "f32[1, 73984, 128]" = torch.ops.aten.clone.default(convert_element_type_3, memory_format = torch.contiguous_format); convert_element_type_3 = None var_mean = torch.ops.aten.var_mean.correction(clone, [2], correction = 0, keepdim = True) getitem: "f32[1, 73984, 1]" = var_mean[0] getitem_1: "f32[1, 73984, 1]" = var_mean[1]; var_mean = None add: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem, 1e-05); getitem = None rsqrt: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add); add = None sub: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(clone, getitem_1); clone = getitem_1 = None mul: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub, rsqrt); sub = rsqrt = None mul_1: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul, arg3_1); mul = arg3_1 = None add_1: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_1, arg4_1); mul_1 = arg4_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:522 in forward, code: x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) permute_1: "f32[1, 128, 73984]" = torch.ops.aten.permute.default(add_1, [0, 2, 1]); add_1 = None view_1: "f32[1, 128, 272, 272]" = torch.ops.aten.view.default(permute_1, [-1, 128, 272, 272]); permute_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:703 in forward, code: x = x.flatten(2).transpose(1, 2) view_2: "f32[1, 128, 73984]" = torch.ops.aten.view.default(view_1, [1, 128, 73984]); view_1 = None permute_2: "f32[1, 73984, 128]" = torch.ops.aten.permute.default(view_2, [0, 2, 1]); view_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_1: "f32[1, 73984, 128]" = torch.ops.aten.clone.default(permute_2); permute_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full: "f32[1, 273, 273, 1]" = torch.ops.aten.full.default([1, 273, 273, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant2 = self._tensor_constant2 lift_fresh_copy_2: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant2); _tensor_constant2 = None slice_1: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(full, 0, 0, 9223372036854775807) slice_2: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_1, 1, 0, -7); slice_1 = None slice_3: "f32[1, 266, 266, 1]" = torch.ops.aten.slice.Tensor(slice_2, 2, 0, -7); slice_2 = None slice_4: "f32[1, 266, 266, 1]" = torch.ops.aten.slice.Tensor(slice_3, 3, 0, 9223372036854775807); slice_3 = None copy: "f32[1, 266, 266, 1]" = torch.ops.aten.copy.default(slice_4, lift_fresh_copy_2); slice_4 = lift_fresh_copy_2 = None slice_5: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(full, 0, 0, 9223372036854775807) slice_6: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_5, 1, 0, -7) slice_7: "f32[1, 266, 266, 1]" = torch.ops.aten.slice.Tensor(slice_6, 2, 0, -7) slice_scatter: "f32[1, 266, 266, 1]" = torch.ops.aten.slice_scatter.default(slice_7, copy, 3, 0, 9223372036854775807); slice_7 = copy = None slice_scatter_1: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_6, slice_scatter, 2, 0, -7); slice_6 = slice_scatter = None slice_scatter_2: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_5, slice_scatter_1, 1, 0, -7); slice_5 = slice_scatter_1 = None slice_scatter_3: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(full, slice_scatter_2, 0, 0, 9223372036854775807); full = slice_scatter_2 = None _tensor_constant3 = self._tensor_constant3 lift_fresh_copy_3: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant3); _tensor_constant3 = None slice_16: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_3, 0, 0, 9223372036854775807) slice_17: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_16, 1, 0, -7); slice_16 = None slice_18: "f32[1, 266, 4, 1]" = torch.ops.aten.slice.Tensor(slice_17, 2, -7, -3); slice_17 = None slice_19: "f32[1, 266, 4, 1]" = torch.ops.aten.slice.Tensor(slice_18, 3, 0, 9223372036854775807); slice_18 = None copy_1: "f32[1, 266, 4, 1]" = torch.ops.aten.copy.default(slice_19, lift_fresh_copy_3); slice_19 = lift_fresh_copy_3 = None slice_20: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_3, 0, 0, 9223372036854775807) slice_21: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_20, 1, 0, -7) slice_22: "f32[1, 266, 4, 1]" = torch.ops.aten.slice.Tensor(slice_21, 2, -7, -3) slice_scatter_4: "f32[1, 266, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_22, copy_1, 3, 0, 9223372036854775807); slice_22 = copy_1 = None slice_scatter_5: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_21, slice_scatter_4, 2, -7, -3); slice_21 = slice_scatter_4 = None slice_scatter_6: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_20, slice_scatter_5, 1, 0, -7); slice_20 = slice_scatter_5 = None slice_scatter_7: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_3, slice_scatter_6, 0, 0, 9223372036854775807); slice_scatter_3 = slice_scatter_6 = None _tensor_constant4 = self._tensor_constant4 lift_fresh_copy_4: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant4); _tensor_constant4 = None slice_31: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_7, 0, 0, 9223372036854775807) slice_32: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_31, 1, 0, -7); slice_31 = None slice_33: "f32[1, 266, 3, 1]" = torch.ops.aten.slice.Tensor(slice_32, 2, -3, 9223372036854775807); slice_32 = None slice_34: "f32[1, 266, 3, 1]" = torch.ops.aten.slice.Tensor(slice_33, 3, 0, 9223372036854775807); slice_33 = None copy_2: "f32[1, 266, 3, 1]" = torch.ops.aten.copy.default(slice_34, lift_fresh_copy_4); slice_34 = lift_fresh_copy_4 = None slice_35: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_7, 0, 0, 9223372036854775807) slice_36: "f32[1, 266, 273, 1]" = torch.ops.aten.slice.Tensor(slice_35, 1, 0, -7) slice_37: "f32[1, 266, 3, 1]" = torch.ops.aten.slice.Tensor(slice_36, 2, -3, 9223372036854775807) slice_scatter_8: "f32[1, 266, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_37, copy_2, 3, 0, 9223372036854775807); slice_37 = copy_2 = None slice_scatter_9: "f32[1, 266, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_36, slice_scatter_8, 2, -3, 9223372036854775807); slice_36 = slice_scatter_8 = None slice_scatter_10: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_35, slice_scatter_9, 1, 0, -7); slice_35 = slice_scatter_9 = None slice_scatter_11: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_7, slice_scatter_10, 0, 0, 9223372036854775807); slice_scatter_7 = slice_scatter_10 = None _tensor_constant5 = self._tensor_constant5 lift_fresh_copy_5: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant5); _tensor_constant5 = None slice_46: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_11, 0, 0, 9223372036854775807) slice_47: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_46, 1, -7, -3); slice_46 = None slice_48: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(slice_47, 2, 0, -7); slice_47 = None slice_49: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(slice_48, 3, 0, 9223372036854775807); slice_48 = None copy_3: "f32[1, 4, 266, 1]" = torch.ops.aten.copy.default(slice_49, lift_fresh_copy_5); slice_49 = lift_fresh_copy_5 = None slice_50: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_11, 0, 0, 9223372036854775807) slice_51: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_50, 1, -7, -3) slice_52: "f32[1, 4, 266, 1]" = torch.ops.aten.slice.Tensor(slice_51, 2, 0, -7) slice_scatter_12: "f32[1, 4, 266, 1]" = torch.ops.aten.slice_scatter.default(slice_52, copy_3, 3, 0, 9223372036854775807); slice_52 = copy_3 = None slice_scatter_13: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_51, slice_scatter_12, 2, 0, -7); slice_51 = slice_scatter_12 = None slice_scatter_14: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_50, slice_scatter_13, 1, -7, -3); slice_50 = slice_scatter_13 = None slice_scatter_15: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_11, slice_scatter_14, 0, 0, 9223372036854775807); slice_scatter_11 = slice_scatter_14 = None _tensor_constant6 = self._tensor_constant6 lift_fresh_copy_6: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant6); _tensor_constant6 = None slice_61: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_15, 0, 0, 9223372036854775807) slice_62: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_61, 1, -7, -3); slice_61 = None slice_63: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_62, 2, -7, -3); slice_62 = None slice_64: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_63, 3, 0, 9223372036854775807); slice_63 = None copy_4: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_64, lift_fresh_copy_6); slice_64 = lift_fresh_copy_6 = None slice_65: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_15, 0, 0, 9223372036854775807) slice_66: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_65, 1, -7, -3) slice_67: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_66, 2, -7, -3) slice_scatter_16: "f32[1, 4, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_67, copy_4, 3, 0, 9223372036854775807); slice_67 = copy_4 = None slice_scatter_17: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_66, slice_scatter_16, 2, -7, -3); slice_66 = slice_scatter_16 = None slice_scatter_18: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_65, slice_scatter_17, 1, -7, -3); slice_65 = slice_scatter_17 = None slice_scatter_19: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_15, slice_scatter_18, 0, 0, 9223372036854775807); slice_scatter_15 = slice_scatter_18 = None _tensor_constant7 = self._tensor_constant7 lift_fresh_copy_7: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant7); _tensor_constant7 = None slice_76: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_19, 0, 0, 9223372036854775807) slice_77: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_76, 1, -7, -3); slice_76 = None slice_78: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_77, 2, -3, 9223372036854775807); slice_77 = None slice_79: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_78, 3, 0, 9223372036854775807); slice_78 = None copy_5: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_79, lift_fresh_copy_7); slice_79 = lift_fresh_copy_7 = None slice_80: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_19, 0, 0, 9223372036854775807) slice_81: "f32[1, 4, 273, 1]" = torch.ops.aten.slice.Tensor(slice_80, 1, -7, -3) slice_82: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_81, 2, -3, 9223372036854775807) slice_scatter_20: "f32[1, 4, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_82, copy_5, 3, 0, 9223372036854775807); slice_82 = copy_5 = None slice_scatter_21: "f32[1, 4, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_81, slice_scatter_20, 2, -3, 9223372036854775807); slice_81 = slice_scatter_20 = None slice_scatter_22: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_80, slice_scatter_21, 1, -7, -3); slice_80 = slice_scatter_21 = None slice_scatter_23: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_19, slice_scatter_22, 0, 0, 9223372036854775807); slice_scatter_19 = slice_scatter_22 = None _tensor_constant8 = self._tensor_constant8 lift_fresh_copy_8: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant8); _tensor_constant8 = None slice_91: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_23, 0, 0, 9223372036854775807) slice_92: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_91, 1, -3, 9223372036854775807); slice_91 = None slice_93: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(slice_92, 2, 0, -7); slice_92 = None slice_94: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(slice_93, 3, 0, 9223372036854775807); slice_93 = None copy_6: "f32[1, 3, 266, 1]" = torch.ops.aten.copy.default(slice_94, lift_fresh_copy_8); slice_94 = lift_fresh_copy_8 = None slice_95: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_23, 0, 0, 9223372036854775807) slice_96: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_95, 1, -3, 9223372036854775807) slice_97: "f32[1, 3, 266, 1]" = torch.ops.aten.slice.Tensor(slice_96, 2, 0, -7) slice_scatter_24: "f32[1, 3, 266, 1]" = torch.ops.aten.slice_scatter.default(slice_97, copy_6, 3, 0, 9223372036854775807); slice_97 = copy_6 = None slice_scatter_25: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_96, slice_scatter_24, 2, 0, -7); slice_96 = slice_scatter_24 = None slice_scatter_26: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_95, slice_scatter_25, 1, -3, 9223372036854775807); slice_95 = slice_scatter_25 = None slice_scatter_27: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_23, slice_scatter_26, 0, 0, 9223372036854775807); slice_scatter_23 = slice_scatter_26 = None _tensor_constant9 = self._tensor_constant9 lift_fresh_copy_9: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant9); _tensor_constant9 = None slice_106: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_27, 0, 0, 9223372036854775807) slice_107: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_106, 1, -3, 9223372036854775807); slice_106 = None slice_108: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_107, 2, -7, -3); slice_107 = None slice_109: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_108, 3, 0, 9223372036854775807); slice_108 = None copy_7: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_109, lift_fresh_copy_9); slice_109 = lift_fresh_copy_9 = None slice_110: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_27, 0, 0, 9223372036854775807) slice_111: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_110, 1, -3, 9223372036854775807) slice_112: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_111, 2, -7, -3) slice_scatter_28: "f32[1, 3, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_112, copy_7, 3, 0, 9223372036854775807); slice_112 = copy_7 = None slice_scatter_29: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_111, slice_scatter_28, 2, -7, -3); slice_111 = slice_scatter_28 = None slice_scatter_30: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_110, slice_scatter_29, 1, -3, 9223372036854775807); slice_110 = slice_scatter_29 = None slice_scatter_31: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_27, slice_scatter_30, 0, 0, 9223372036854775807); slice_scatter_27 = slice_scatter_30 = None _tensor_constant10 = self._tensor_constant10 lift_fresh_copy_10: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant10); _tensor_constant10 = None slice_121: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_31, 0, 0, 9223372036854775807) slice_122: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_121, 1, -3, 9223372036854775807); slice_121 = None slice_123: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_122, 2, -3, 9223372036854775807); slice_122 = None slice_124: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_123, 3, 0, 9223372036854775807); slice_123 = None copy_8: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_124, lift_fresh_copy_10); slice_124 = lift_fresh_copy_10 = None slice_125: "f32[1, 273, 273, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_31, 0, 0, 9223372036854775807) slice_126: "f32[1, 3, 273, 1]" = torch.ops.aten.slice.Tensor(slice_125, 1, -3, 9223372036854775807) slice_127: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_126, 2, -3, 9223372036854775807) slice_scatter_32: "f32[1, 3, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_127, copy_8, 3, 0, 9223372036854775807); slice_127 = copy_8 = None slice_scatter_33: "f32[1, 3, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_126, slice_scatter_32, 2, -3, 9223372036854775807); slice_126 = slice_scatter_32 = None slice_scatter_34: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_125, slice_scatter_33, 1, -3, 9223372036854775807); slice_125 = slice_scatter_33 = None slice_scatter_35: "f32[1, 273, 273, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_31, slice_scatter_34, 0, 0, 9223372036854775807); slice_scatter_31 = slice_scatter_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_4: "f32[1, 39, 7, 39, 7, 1]" = torch.ops.aten.view.default(slice_scatter_35, [1, 39, 7, 39, 7, 1]); slice_scatter_35 = None permute_4: "f32[1, 39, 39, 7, 7, 1]" = torch.ops.aten.permute.default(view_4, [0, 1, 3, 2, 4, 5]); view_4 = None clone_2: "f32[1, 39, 39, 7, 7, 1]" = torch.ops.aten.clone.default(permute_4, memory_format = torch.contiguous_format); permute_4 = None view_5: "f32[1521, 7, 7, 1]" = torch.ops.aten.view.default(clone_2, [-1, 7, 7, 1]); clone_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_6: "f32[1521, 49]" = torch.ops.aten.view.default(view_5, [-1, 49]); view_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze: "f32[1521, 1, 49]" = torch.ops.aten.unsqueeze.default(view_6, 1) unsqueeze_1: "f32[1521, 49, 1]" = torch.ops.aten.unsqueeze.default(view_6, 2); view_6 = None sub_1: "f32[1521, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze, unsqueeze_1); unsqueeze = unsqueeze_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne: "b8[1521, 49, 49]" = torch.ops.aten.ne.Scalar(sub_1, 0) scalar_tensor: "f32[]" = torch.ops.aten.scalar_tensor.default(-100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where: "f32[1521, 49, 49]" = torch.ops.aten.where.self(ne, scalar_tensor, sub_1); ne = scalar_tensor = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq: "b8[1521, 49, 49]" = torch.ops.aten.eq.Scalar(sub_1, 0); sub_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( scalar_tensor_1: "f32[]" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where_1: "f32[1521, 49, 49]" = torch.ops.aten.where.self(eq, scalar_tensor_1, where); eq = scalar_tensor_1 = where = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_1 = torch.ops.aten.var_mean.correction(clone_1, [2], correction = 0, keepdim = True) getitem_2: "f32[1, 73984, 1]" = var_mean_1[0] getitem_3: "f32[1, 73984, 1]" = var_mean_1[1]; var_mean_1 = None add_2: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_2, 1e-05); getitem_2 = None rsqrt_1: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_2); add_2 = None sub_2: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(clone_1, getitem_3); getitem_3 = None mul_4: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_2, rsqrt_1); sub_2 = rsqrt_1 = None mul_5: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_4, arg5_1); mul_4 = arg5_1 = None add_3: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_5, arg6_1); mul_5 = arg6_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_7: "f32[1, 272, 272, 128]" = torch.ops.aten.view.default(add_3, [1, 272, 272, 128]); add_3 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd: "f32[1, 273, 273, 128]" = torch.ops.aten.constant_pad_nd.default(view_7, [0, 0, 0, 1, 0, 1], 0.0); view_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_8: "f32[1, 39, 7, 39, 7, 128]" = torch.ops.aten.view.default(constant_pad_nd, [1, 39, 7, 39, 7, 128]); constant_pad_nd = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_5: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.permute.default(view_8, [0, 1, 3, 2, 4, 5]); view_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_3: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.clone.default(permute_5, memory_format = torch.contiguous_format); permute_5 = None view_9: "f32[1521, 7, 7, 128]" = torch.ops.aten.view.default(clone_3, [-1, 7, 7, 128]); clone_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_10: "f32[1521, 49, 128]" = torch.ops.aten.view.default(view_9, [-1, 49, 128]); view_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_6: "f16[384]" = torch.ops.prims.convert_element_type.default(arg8_1, torch.float16); arg8_1 = None convert_element_type_7: "f16[384, 128]" = torch.ops.prims.convert_element_type.default(arg7_1, torch.float16); arg7_1 = None convert_element_type_8: "f16[1521, 49, 128]" = torch.ops.prims.convert_element_type.default(view_10, torch.float16); view_10 = None view_11: "f16[74529, 128]" = torch.ops.aten.view.default(convert_element_type_8, [74529, 128]); convert_element_type_8 = None permute_6: "f16[128, 384]" = torch.ops.aten.permute.default(convert_element_type_7, [1, 0]); convert_element_type_7 = None addmm: "f16[74529, 384]" = torch.ops.aten.addmm.default(convert_element_type_6, view_11, permute_6); convert_element_type_6 = view_11 = permute_6 = None view_12: "f16[1521, 49, 384]" = torch.ops.aten.view.default(addmm, [1521, 49, 384]); addmm = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_13: "f16[1521, 49, 3, 4, 32]" = torch.ops.aten.view.default(view_12, [1521, 49, 3, 4, 32]); view_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_7: "f16[3, 1521, 4, 49, 32]" = torch.ops.aten.permute.default(view_13, [2, 0, 3, 1, 4]); view_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 0) select_1: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 1) select_2: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_7, 0, 2); permute_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_6: "f16[1521, 4, 49, 32]" = torch.ops.aten.mul.Tensor(select, 0.1767766952966369); select = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_8: "f16[1521, 4, 32, 49]" = torch.ops.aten.permute.default(select_1, [0, 1, 3, 2]); select_1 = None expand: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(mul_6, [1521, 4, 49, 32]); mul_6 = None clone_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand, memory_format = torch.contiguous_format); expand = None view_14: "f16[6084, 49, 32]" = torch.ops.aten.view.default(clone_4, [6084, 49, 32]); clone_4 = None expand_1: "f16[1521, 4, 32, 49]" = torch.ops.aten.expand.default(permute_8, [1521, 4, 32, 49]); permute_8 = None clone_5: "f16[1521, 4, 32, 49]" = torch.ops.aten.clone.default(expand_1, memory_format = torch.contiguous_format); expand_1 = None view_15: "f16[6084, 32, 49]" = torch.ops.aten.view.default(clone_5, [6084, 32, 49]); clone_5 = None bmm: "f16[6084, 49, 49]" = torch.ops.aten.bmm.default(view_14, view_15); view_14 = view_15 = None view_16: "f16[1521, 4, 49, 49]" = torch.ops.aten.view.default(bmm, [1521, 4, 49, 49]); bmm = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_17: "i64[2401]" = torch.ops.aten.view.default(arg10_1, [-1]); arg10_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index: "f32[2401, 4]" = torch.ops.aten.index.Tensor(arg9_1, [view_17]); arg9_1 = view_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_18: "f32[49, 49, 4]" = torch.ops.aten.view.default(index, [49, 49, -1]); index = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_9: "f32[4, 49, 49]" = torch.ops.aten.permute.default(view_18, [2, 0, 1]); view_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_6: "f32[4, 49, 49]" = torch.ops.aten.clone.default(permute_9, memory_format = torch.contiguous_format); permute_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_2: "f32[1, 4, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_6, 0); clone_6 = None add_4: "f32[1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_16, unsqueeze_2); view_16 = unsqueeze_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax: "f32[1521, 4, 49, 1]" = torch.ops.aten.amax.default(add_4, [-1], True) sub_3: "f32[1521, 4, 49, 49]" = torch.ops.aten.sub.Tensor(add_4, amax); add_4 = amax = None exp: "f32[1521, 4, 49, 49]" = torch.ops.aten.exp.default(sub_3); sub_3 = None sum_1: "f32[1521, 4, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp, [-1], True) div_2: "f32[1521, 4, 49, 49]" = torch.ops.aten.div.Tensor(exp, sum_1); exp = sum_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_7: "f32[1521, 4, 49, 49]" = torch.ops.aten.clone.default(div_2); div_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_14: "f16[1521, 4, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_7, torch.float16); clone_7 = None expand_2: "f16[1521, 4, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_14, [1521, 4, 49, 49]); convert_element_type_14 = None view_19: "f16[6084, 49, 49]" = torch.ops.aten.view.default(expand_2, [6084, 49, 49]); expand_2 = None expand_3: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(select_2, [1521, 4, 49, 32]); select_2 = None clone_8: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_3, memory_format = torch.contiguous_format); expand_3 = None view_20: "f16[6084, 49, 32]" = torch.ops.aten.view.default(clone_8, [6084, 49, 32]); clone_8 = None bmm_1: "f16[6084, 49, 32]" = torch.ops.aten.bmm.default(view_19, view_20); view_19 = view_20 = None view_21: "f16[1521, 4, 49, 32]" = torch.ops.aten.view.default(bmm_1, [1521, 4, 49, 32]); bmm_1 = None permute_10: "f16[1521, 49, 4, 32]" = torch.ops.aten.permute.default(view_21, [0, 2, 1, 3]); view_21 = None clone_9: "f16[1521, 49, 4, 32]" = torch.ops.aten.clone.default(permute_10, memory_format = torch.contiguous_format); permute_10 = None view_22: "f16[1521, 49, 128]" = torch.ops.aten.view.default(clone_9, [1521, 49, 128]); clone_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_17: "f16[128]" = torch.ops.prims.convert_element_type.default(arg12_1, torch.float16); arg12_1 = None convert_element_type_18: "f16[128, 128]" = torch.ops.prims.convert_element_type.default(arg11_1, torch.float16); arg11_1 = None view_23: "f16[74529, 128]" = torch.ops.aten.view.default(view_22, [74529, 128]); view_22 = None permute_11: "f16[128, 128]" = torch.ops.aten.permute.default(convert_element_type_18, [1, 0]); convert_element_type_18 = None addmm_1: "f16[74529, 128]" = torch.ops.aten.addmm.default(convert_element_type_17, view_23, permute_11); convert_element_type_17 = view_23 = permute_11 = None view_24: "f16[1521, 49, 128]" = torch.ops.aten.view.default(addmm_1, [1521, 49, 128]); addmm_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_10: "f16[1521, 49, 128]" = torch.ops.aten.clone.default(view_24); view_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_25: "f16[1521, 7, 7, 128]" = torch.ops.aten.view.default(clone_10, [-1, 7, 7, 128]); clone_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_26: "f16[1, 39, 39, 7, 7, 128]" = torch.ops.aten.view.default(view_25, [1, 39, 39, 7, 7, -1]); view_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_12: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.permute.default(view_26, [0, 1, 3, 2, 4, 5]); view_26 = None clone_11: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.clone.default(permute_12, memory_format = torch.contiguous_format); permute_12 = None view_27: "f16[1, 273, 273, 128]" = torch.ops.aten.view.default(clone_11, [1, 273, 273, -1]); clone_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_132: "f16[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(view_27, 0, 0, 9223372036854775807); view_27 = None slice_133: "f16[1, 272, 273, 128]" = torch.ops.aten.slice.Tensor(slice_132, 1, 0, 272); slice_132 = None slice_134: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_133, 2, 0, 272); slice_133 = None slice_135: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_134, 3, 0, 9223372036854775807); slice_134 = None clone_12: "f16[1, 272, 272, 128]" = torch.ops.aten.clone.default(slice_135, memory_format = torch.contiguous_format); slice_135 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_28: "f16[1, 73984, 128]" = torch.ops.aten.view.default(clone_12, [1, 73984, 128]); clone_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_5: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(clone_1, view_28); clone_1 = view_28 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_2 = torch.ops.aten.var_mean.correction(add_5, [2], correction = 0, keepdim = True) getitem_4: "f32[1, 73984, 1]" = var_mean_2[0] getitem_5: "f32[1, 73984, 1]" = var_mean_2[1]; var_mean_2 = None add_6: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_4, 1e-05); getitem_4 = None rsqrt_2: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_6); add_6 = None sub_4: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_5, getitem_5); getitem_5 = None mul_7: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_4, rsqrt_2); sub_4 = rsqrt_2 = None mul_8: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_7, arg13_1); mul_7 = arg13_1 = None add_7: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_8, arg14_1); mul_8 = arg14_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_22: "f16[512]" = torch.ops.prims.convert_element_type.default(arg16_1, torch.float16); arg16_1 = None convert_element_type_23: "f16[512, 128]" = torch.ops.prims.convert_element_type.default(arg15_1, torch.float16); arg15_1 = None convert_element_type_24: "f16[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(add_7, torch.float16); add_7 = None view_29: "f16[73984, 128]" = torch.ops.aten.view.default(convert_element_type_24, [73984, 128]); convert_element_type_24 = None permute_13: "f16[128, 512]" = torch.ops.aten.permute.default(convert_element_type_23, [1, 0]); convert_element_type_23 = None addmm_2: "f16[73984, 512]" = torch.ops.aten.addmm.default(convert_element_type_22, view_29, permute_13); convert_element_type_22 = view_29 = permute_13 = None view_30: "f16[1, 73984, 512]" = torch.ops.aten.view.default(addmm_2, [1, 73984, 512]); addmm_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_28: "f32[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(view_30, torch.float32); view_30 = None mul_9: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_28, 0.5) mul_10: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_28, 0.7071067811865476); convert_element_type_28 = None erf: "f32[1, 73984, 512]" = torch.ops.aten.erf.default(mul_10); mul_10 = None add_8: "f32[1, 73984, 512]" = torch.ops.aten.add.Tensor(erf, 1); erf = None mul_11: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(mul_9, add_8); mul_9 = add_8 = None convert_element_type_29: "f16[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(mul_11, torch.float16); mul_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_13: "f16[1, 73984, 512]" = torch.ops.aten.clone.default(convert_element_type_29); convert_element_type_29 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_30: "f16[128]" = torch.ops.prims.convert_element_type.default(arg18_1, torch.float16); arg18_1 = None convert_element_type_31: "f16[128, 512]" = torch.ops.prims.convert_element_type.default(arg17_1, torch.float16); arg17_1 = None view_31: "f16[73984, 512]" = torch.ops.aten.view.default(clone_13, [73984, 512]); clone_13 = None permute_14: "f16[512, 128]" = torch.ops.aten.permute.default(convert_element_type_31, [1, 0]); convert_element_type_31 = None addmm_3: "f16[73984, 128]" = torch.ops.aten.addmm.default(convert_element_type_30, view_31, permute_14); convert_element_type_30 = view_31 = permute_14 = None view_32: "f16[1, 73984, 128]" = torch.ops.aten.view.default(addmm_3, [1, 73984, 128]); addmm_3 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_14: "f16[1, 73984, 128]" = torch.ops.aten.clone.default(view_32); view_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_9: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_5, clone_14); add_5 = clone_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_3 = torch.ops.aten.var_mean.correction(add_9, [2], correction = 0, keepdim = True) getitem_6: "f32[1, 73984, 1]" = var_mean_3[0] getitem_7: "f32[1, 73984, 1]" = var_mean_3[1]; var_mean_3 = None add_10: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_6, 1e-05); getitem_6 = None rsqrt_3: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_10); add_10 = None sub_5: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_9, getitem_7); getitem_7 = None mul_12: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_5, rsqrt_3); sub_5 = rsqrt_3 = None mul_13: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_12, arg19_1); mul_12 = arg19_1 = None add_11: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_13, arg20_1); mul_13 = arg20_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_33: "f32[1, 272, 272, 128]" = torch.ops.aten.view.default(add_11, [1, 272, 272, 128]); add_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_1: "f32[1, 273, 273, 128]" = torch.ops.aten.constant_pad_nd.default(view_33, [0, 0, 0, 1, 0, 1], 0.0); view_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_12: "i64[273]" = torch.ops.aten.add.Tensor(iota, 3); iota = None fmod: "i64[273]" = torch.ops.aten.fmod.Scalar(add_12, 273); add_12 = None slice_136: "f32[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(constant_pad_nd_1, 0, 0, 9223372036854775807); constant_pad_nd_1 = None index_1: "f32[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(slice_136, [None, fmod]); slice_136 = fmod = None iota_1: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_13: "i64[273]" = torch.ops.aten.add.Tensor(iota_1, 3); iota_1 = None fmod_1: "i64[273]" = torch.ops.aten.fmod.Scalar(add_13, 273); add_13 = None slice_137: "f32[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(index_1, 0, 0, 9223372036854775807); index_1 = None slice_138: "f32[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(slice_137, 1, 0, 9223372036854775807); slice_137 = None index_2: "f32[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(slice_138, [None, None, fmod_1]); slice_138 = fmod_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_34: "f32[1, 39, 7, 39, 7, 128]" = torch.ops.aten.view.default(index_2, [1, 39, 7, 39, 7, 128]); index_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_15: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.permute.default(view_34, [0, 1, 3, 2, 4, 5]); view_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_15: "f32[1, 39, 39, 7, 7, 128]" = torch.ops.aten.clone.default(permute_15, memory_format = torch.contiguous_format); permute_15 = None view_35: "f32[1521, 7, 7, 128]" = torch.ops.aten.view.default(clone_15, [-1, 7, 7, 128]); clone_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_36: "f32[1521, 49, 128]" = torch.ops.aten.view.default(view_35, [-1, 49, 128]); view_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_35: "f16[384]" = torch.ops.prims.convert_element_type.default(arg22_1, torch.float16); arg22_1 = None convert_element_type_36: "f16[384, 128]" = torch.ops.prims.convert_element_type.default(arg21_1, torch.float16); arg21_1 = None convert_element_type_37: "f16[1521, 49, 128]" = torch.ops.prims.convert_element_type.default(view_36, torch.float16); view_36 = None view_37: "f16[74529, 128]" = torch.ops.aten.view.default(convert_element_type_37, [74529, 128]); convert_element_type_37 = None permute_16: "f16[128, 384]" = torch.ops.aten.permute.default(convert_element_type_36, [1, 0]); convert_element_type_36 = None addmm_4: "f16[74529, 384]" = torch.ops.aten.addmm.default(convert_element_type_35, view_37, permute_16); convert_element_type_35 = view_37 = permute_16 = None view_38: "f16[1521, 49, 384]" = torch.ops.aten.view.default(addmm_4, [1521, 49, 384]); addmm_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_39: "f16[1521, 49, 3, 4, 32]" = torch.ops.aten.view.default(view_38, [1521, 49, 3, 4, 32]); view_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_17: "f16[3, 1521, 4, 49, 32]" = torch.ops.aten.permute.default(view_39, [2, 0, 3, 1, 4]); view_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_3: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 0) select_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 1) select_5: "f16[1521, 4, 49, 32]" = torch.ops.aten.select.int(permute_17, 0, 2); permute_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_14: "f16[1521, 4, 49, 32]" = torch.ops.aten.mul.Tensor(select_3, 0.1767766952966369); select_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_18: "f16[1521, 4, 32, 49]" = torch.ops.aten.permute.default(select_4, [0, 1, 3, 2]); select_4 = None expand_4: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(mul_14, [1521, 4, 49, 32]); mul_14 = None clone_16: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_4, memory_format = torch.contiguous_format); expand_4 = None view_40: "f16[6084, 49, 32]" = torch.ops.aten.view.default(clone_16, [6084, 49, 32]); clone_16 = None expand_5: "f16[1521, 4, 32, 49]" = torch.ops.aten.expand.default(permute_18, [1521, 4, 32, 49]); permute_18 = None clone_17: "f16[1521, 4, 32, 49]" = torch.ops.aten.clone.default(expand_5, memory_format = torch.contiguous_format); expand_5 = None view_41: "f16[6084, 32, 49]" = torch.ops.aten.view.default(clone_17, [6084, 32, 49]); clone_17 = None bmm_2: "f16[6084, 49, 49]" = torch.ops.aten.bmm.default(view_40, view_41); view_40 = view_41 = None view_42: "f16[1521, 4, 49, 49]" = torch.ops.aten.view.default(bmm_2, [1521, 4, 49, 49]); bmm_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_43: "i64[2401]" = torch.ops.aten.view.default(arg24_1, [-1]); arg24_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_3: "f32[2401, 4]" = torch.ops.aten.index.Tensor(arg23_1, [view_43]); arg23_1 = view_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_44: "f32[49, 49, 4]" = torch.ops.aten.view.default(index_3, [49, 49, -1]); index_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_19: "f32[4, 49, 49]" = torch.ops.aten.permute.default(view_44, [2, 0, 1]); view_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_18: "f32[4, 49, 49]" = torch.ops.aten.clone.default(permute_19, memory_format = torch.contiguous_format); permute_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_3: "f32[1, 4, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_18, 0); clone_18 = None add_14: "f32[1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_42, unsqueeze_3); view_42 = unsqueeze_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_45: "f32[1, 1521, 4, 49, 49]" = torch.ops.aten.view.default(add_14, [1, 1521, 4, 49, 49]); add_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_4: "f32[1521, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_1, 1); where_1 = None unsqueeze_5: "f32[1, 1521, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_4, 0); unsqueeze_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_15: "f32[1, 1521, 4, 49, 49]" = torch.ops.aten.add.Tensor(view_45, unsqueeze_5); view_45 = unsqueeze_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_46: "f32[1521, 4, 49, 49]" = torch.ops.aten.view.default(add_15, [-1, 4, 49, 49]); add_15 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_1: "f32[1521, 4, 49, 1]" = torch.ops.aten.amax.default(view_46, [-1], True) sub_6: "f32[1521, 4, 49, 49]" = torch.ops.aten.sub.Tensor(view_46, amax_1); view_46 = amax_1 = None exp_1: "f32[1521, 4, 49, 49]" = torch.ops.aten.exp.default(sub_6); sub_6 = None sum_2: "f32[1521, 4, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_1, [-1], True) div_3: "f32[1521, 4, 49, 49]" = torch.ops.aten.div.Tensor(exp_1, sum_2); exp_1 = sum_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_19: "f32[1521, 4, 49, 49]" = torch.ops.aten.clone.default(div_3); div_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_43: "f16[1521, 4, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_19, torch.float16); clone_19 = None expand_6: "f16[1521, 4, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_43, [1521, 4, 49, 49]); convert_element_type_43 = None view_47: "f16[6084, 49, 49]" = torch.ops.aten.view.default(expand_6, [6084, 49, 49]); expand_6 = None expand_7: "f16[1521, 4, 49, 32]" = torch.ops.aten.expand.default(select_5, [1521, 4, 49, 32]); select_5 = None clone_20: "f16[1521, 4, 49, 32]" = torch.ops.aten.clone.default(expand_7, memory_format = torch.contiguous_format); expand_7 = None view_48: "f16[6084, 49, 32]" = torch.ops.aten.view.default(clone_20, [6084, 49, 32]); clone_20 = None bmm_3: "f16[6084, 49, 32]" = torch.ops.aten.bmm.default(view_47, view_48); view_47 = view_48 = None view_49: "f16[1521, 4, 49, 32]" = torch.ops.aten.view.default(bmm_3, [1521, 4, 49, 32]); bmm_3 = None permute_20: "f16[1521, 49, 4, 32]" = torch.ops.aten.permute.default(view_49, [0, 2, 1, 3]); view_49 = None clone_21: "f16[1521, 49, 4, 32]" = torch.ops.aten.clone.default(permute_20, memory_format = torch.contiguous_format); permute_20 = None view_50: "f16[1521, 49, 128]" = torch.ops.aten.view.default(clone_21, [1521, 49, 128]); clone_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_46: "f16[128]" = torch.ops.prims.convert_element_type.default(arg26_1, torch.float16); arg26_1 = None convert_element_type_47: "f16[128, 128]" = torch.ops.prims.convert_element_type.default(arg25_1, torch.float16); arg25_1 = None view_51: "f16[74529, 128]" = torch.ops.aten.view.default(view_50, [74529, 128]); view_50 = None permute_21: "f16[128, 128]" = torch.ops.aten.permute.default(convert_element_type_47, [1, 0]); convert_element_type_47 = None addmm_5: "f16[74529, 128]" = torch.ops.aten.addmm.default(convert_element_type_46, view_51, permute_21); convert_element_type_46 = view_51 = permute_21 = None view_52: "f16[1521, 49, 128]" = torch.ops.aten.view.default(addmm_5, [1521, 49, 128]); addmm_5 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_22: "f16[1521, 49, 128]" = torch.ops.aten.clone.default(view_52); view_52 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_53: "f16[1521, 7, 7, 128]" = torch.ops.aten.view.default(clone_22, [-1, 7, 7, 128]); clone_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_54: "f16[1, 39, 39, 7, 7, 128]" = torch.ops.aten.view.default(view_53, [1, 39, 39, 7, 7, -1]); view_53 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_22: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.permute.default(view_54, [0, 1, 3, 2, 4, 5]); view_54 = None clone_23: "f16[1, 39, 7, 39, 7, 128]" = torch.ops.aten.clone.default(permute_22, memory_format = torch.contiguous_format); permute_22 = None view_55: "f16[1, 273, 273, 128]" = torch.ops.aten.view.default(clone_23, [1, 273, 273, -1]); clone_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_2: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_16: "i64[273]" = torch.ops.aten.add.Tensor(iota_2, 270); iota_2 = None fmod_2: "i64[273]" = torch.ops.aten.fmod.Scalar(add_16, 273); add_16 = None slice_139: "f16[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(view_55, 0, 0, 9223372036854775807); view_55 = None index_4: "f16[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(slice_139, [None, fmod_2]); slice_139 = fmod_2 = None iota_3: "i64[273]" = torch.ops.prims.iota.default(273, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_17: "i64[273]" = torch.ops.aten.add.Tensor(iota_3, 270); iota_3 = None fmod_3: "i64[273]" = torch.ops.aten.fmod.Scalar(add_17, 273); add_17 = None slice_140: "f16[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(index_4, 0, 0, 9223372036854775807); index_4 = None slice_141: "f16[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(slice_140, 1, 0, 9223372036854775807); slice_140 = None index_5: "f16[1, 273, 273, 128]" = torch.ops.aten.index.Tensor(slice_141, [None, None, fmod_3]); slice_141 = fmod_3 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_142: "f16[1, 273, 273, 128]" = torch.ops.aten.slice.Tensor(index_5, 0, 0, 9223372036854775807); index_5 = None slice_143: "f16[1, 272, 273, 128]" = torch.ops.aten.slice.Tensor(slice_142, 1, 0, 272); slice_142 = None slice_144: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_143, 2, 0, 272); slice_143 = None slice_145: "f16[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(slice_144, 3, 0, 9223372036854775807); slice_144 = None clone_24: "f16[1, 272, 272, 128]" = torch.ops.aten.clone.default(slice_145, memory_format = torch.contiguous_format); slice_145 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_56: "f16[1, 73984, 128]" = torch.ops.aten.view.default(clone_24, [1, 73984, 128]); clone_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_18: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_9, view_56); add_9 = view_56 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_4 = torch.ops.aten.var_mean.correction(add_18, [2], correction = 0, keepdim = True) getitem_8: "f32[1, 73984, 1]" = var_mean_4[0] getitem_9: "f32[1, 73984, 1]" = var_mean_4[1]; var_mean_4 = None add_19: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_8, 1e-05); getitem_8 = None rsqrt_4: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_19); add_19 = None sub_7: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_18, getitem_9); getitem_9 = None mul_15: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_7, rsqrt_4); sub_7 = rsqrt_4 = None mul_16: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_15, arg27_1); mul_15 = arg27_1 = None add_20: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_16, arg28_1); mul_16 = arg28_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_51: "f16[512]" = torch.ops.prims.convert_element_type.default(arg30_1, torch.float16); arg30_1 = None convert_element_type_52: "f16[512, 128]" = torch.ops.prims.convert_element_type.default(arg29_1, torch.float16); arg29_1 = None convert_element_type_53: "f16[1, 73984, 128]" = torch.ops.prims.convert_element_type.default(add_20, torch.float16); add_20 = None view_57: "f16[73984, 128]" = torch.ops.aten.view.default(convert_element_type_53, [73984, 128]); convert_element_type_53 = None permute_23: "f16[128, 512]" = torch.ops.aten.permute.default(convert_element_type_52, [1, 0]); convert_element_type_52 = None addmm_6: "f16[73984, 512]" = torch.ops.aten.addmm.default(convert_element_type_51, view_57, permute_23); convert_element_type_51 = view_57 = permute_23 = None view_58: "f16[1, 73984, 512]" = torch.ops.aten.view.default(addmm_6, [1, 73984, 512]); addmm_6 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_57: "f32[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(view_58, torch.float32); view_58 = None mul_17: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_57, 0.5) mul_18: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(convert_element_type_57, 0.7071067811865476); convert_element_type_57 = None erf_1: "f32[1, 73984, 512]" = torch.ops.aten.erf.default(mul_18); mul_18 = None add_21: "f32[1, 73984, 512]" = torch.ops.aten.add.Tensor(erf_1, 1); erf_1 = None mul_19: "f32[1, 73984, 512]" = torch.ops.aten.mul.Tensor(mul_17, add_21); mul_17 = add_21 = None convert_element_type_58: "f16[1, 73984, 512]" = torch.ops.prims.convert_element_type.default(mul_19, torch.float16); mul_19 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_25: "f16[1, 73984, 512]" = torch.ops.aten.clone.default(convert_element_type_58); convert_element_type_58 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_59: "f16[128]" = torch.ops.prims.convert_element_type.default(arg32_1, torch.float16); arg32_1 = None convert_element_type_60: "f16[128, 512]" = torch.ops.prims.convert_element_type.default(arg31_1, torch.float16); arg31_1 = None view_59: "f16[73984, 512]" = torch.ops.aten.view.default(clone_25, [73984, 512]); clone_25 = None permute_24: "f16[512, 128]" = torch.ops.aten.permute.default(convert_element_type_60, [1, 0]); convert_element_type_60 = None addmm_7: "f16[73984, 128]" = torch.ops.aten.addmm.default(convert_element_type_59, view_59, permute_24); convert_element_type_59 = view_59 = permute_24 = None view_60: "f16[1, 73984, 128]" = torch.ops.aten.view.default(addmm_7, [1, 73984, 128]); addmm_7 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_26: "f16[1, 73984, 128]" = torch.ops.aten.clone.default(view_60); view_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_22: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(add_18, clone_26); add_18 = clone_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:344 in forward, code: x = x.view(B, H, W, C) view_61: "f32[1, 272, 272, 128]" = torch.ops.aten.view.default(add_22, [1, 272, 272, 128]) # File: /workspace/networks/encoders/swin/swin_transformer.py:351 in forward, code: x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C slice_146: "f32[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 0, 0, 9223372036854775807) slice_147: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(slice_146, 1, 0, 9223372036854775807, 2); slice_146 = None slice_148: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_147, 2, 0, 9223372036854775807, 2); slice_147 = None slice_149: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_148, 3, 0, 9223372036854775807); slice_148 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:352 in forward, code: x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C slice_150: "f32[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 0, 0, 9223372036854775807) slice_151: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(slice_150, 1, 1, 9223372036854775807, 2); slice_150 = None slice_152: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_151, 2, 0, 9223372036854775807, 2); slice_151 = None slice_153: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_152, 3, 0, 9223372036854775807); slice_152 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:353 in forward, code: x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C slice_154: "f32[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 0, 0, 9223372036854775807) slice_155: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(slice_154, 1, 0, 9223372036854775807, 2); slice_154 = None slice_156: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_155, 2, 1, 9223372036854775807, 2); slice_155 = None slice_157: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_156, 3, 0, 9223372036854775807); slice_156 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:354 in forward, code: x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C slice_158: "f32[1, 272, 272, 128]" = torch.ops.aten.slice.Tensor(view_61, 0, 0, 9223372036854775807); view_61 = None slice_159: "f32[1, 136, 272, 128]" = torch.ops.aten.slice.Tensor(slice_158, 1, 1, 9223372036854775807, 2); slice_158 = None slice_160: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_159, 2, 1, 9223372036854775807, 2); slice_159 = None slice_161: "f32[1, 136, 136, 128]" = torch.ops.aten.slice.Tensor(slice_160, 3, 0, 9223372036854775807); slice_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:355 in forward, code: x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C cat: "f32[1, 136, 136, 512]" = torch.ops.aten.cat.default([slice_149, slice_153, slice_157, slice_161], -1); slice_149 = slice_153 = slice_157 = slice_161 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:356 in forward, code: x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C view_62: "f32[1, 18496, 512]" = torch.ops.aten.view.default(cat, [1, -1, 512]); cat = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_5 = torch.ops.aten.var_mean.correction(view_62, [2], correction = 0, keepdim = True) getitem_10: "f32[1, 18496, 1]" = var_mean_5[0] getitem_11: "f32[1, 18496, 1]" = var_mean_5[1]; var_mean_5 = None add_23: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_10, 1e-05); getitem_10 = None rsqrt_5: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_23); add_23 = None sub_8: "f32[1, 18496, 512]" = torch.ops.aten.sub.Tensor(view_62, getitem_11); view_62 = getitem_11 = None mul_20: "f32[1, 18496, 512]" = torch.ops.aten.mul.Tensor(sub_8, rsqrt_5); sub_8 = rsqrt_5 = None mul_21: "f32[1, 18496, 512]" = torch.ops.aten.mul.Tensor(mul_20, arg33_1); mul_20 = arg33_1 = None add_24: "f32[1, 18496, 512]" = torch.ops.aten.add.Tensor(mul_21, arg34_1); mul_21 = arg34_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_64: "f16[256, 512]" = torch.ops.prims.convert_element_type.default(arg35_1, torch.float16); arg35_1 = None convert_element_type_65: "f16[1, 18496, 512]" = torch.ops.prims.convert_element_type.default(add_24, torch.float16); add_24 = None permute_25: "f16[512, 256]" = torch.ops.aten.permute.default(convert_element_type_64, [1, 0]); convert_element_type_64 = None view_63: "f16[18496, 512]" = torch.ops.aten.view.default(convert_element_type_65, [18496, 512]); convert_element_type_65 = None mm: "f16[18496, 256]" = torch.ops.aten.mm.default(view_63, permute_25); view_63 = permute_25 = None view_64: "f16[1, 18496, 256]" = torch.ops.aten.view.default(mm, [1, 18496, 256]); mm = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( var_mean_6 = torch.ops.aten.var_mean.correction(add_22, [2], correction = 0, keepdim = True) getitem_12: "f32[1, 73984, 1]" = var_mean_6[0] getitem_13: "f32[1, 73984, 1]" = var_mean_6[1]; var_mean_6 = None add_25: "f32[1, 73984, 1]" = torch.ops.aten.add.Tensor(getitem_12, 1e-05); getitem_12 = None rsqrt_6: "f32[1, 73984, 1]" = torch.ops.aten.rsqrt.default(add_25); add_25 = None sub_9: "f32[1, 73984, 128]" = torch.ops.aten.sub.Tensor(add_22, getitem_13); add_22 = getitem_13 = None mul_22: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(sub_9, rsqrt_6); sub_9 = rsqrt_6 = None mul_23: "f32[1, 73984, 128]" = torch.ops.aten.mul.Tensor(mul_22, arg36_1); mul_22 = arg36_1 = None add_26: "f32[1, 73984, 128]" = torch.ops.aten.add.Tensor(mul_23, arg37_1); mul_23 = arg37_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_65: "f32[1, 272, 272, 128]" = torch.ops.aten.view.default(add_26, [-1, 272, 272, 128]); add_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_26: "f32[1, 128, 272, 272]" = torch.ops.aten.permute.default(view_65, [0, 3, 1, 2]); view_65 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_27: "f32[1, 128, 272, 272]" = torch.ops.aten.clone.default(permute_26, memory_format = torch.contiguous_format); permute_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full_1: "f32[1, 140, 140, 1]" = torch.ops.aten.full.default([1, 140, 140, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant13 = self._tensor_constant13 lift_fresh_copy_13: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant13); _tensor_constant13 = None slice_162: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(full_1, 0, 0, 9223372036854775807) slice_163: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_162, 1, 0, -7); slice_162 = None slice_164: "f32[1, 133, 133, 1]" = torch.ops.aten.slice.Tensor(slice_163, 2, 0, -7); slice_163 = None slice_165: "f32[1, 133, 133, 1]" = torch.ops.aten.slice.Tensor(slice_164, 3, 0, 9223372036854775807); slice_164 = None copy_9: "f32[1, 133, 133, 1]" = torch.ops.aten.copy.default(slice_165, lift_fresh_copy_13); slice_165 = lift_fresh_copy_13 = None slice_166: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(full_1, 0, 0, 9223372036854775807) slice_167: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_166, 1, 0, -7) slice_168: "f32[1, 133, 133, 1]" = torch.ops.aten.slice.Tensor(slice_167, 2, 0, -7) slice_scatter_36: "f32[1, 133, 133, 1]" = torch.ops.aten.slice_scatter.default(slice_168, copy_9, 3, 0, 9223372036854775807); slice_168 = copy_9 = None slice_scatter_37: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_167, slice_scatter_36, 2, 0, -7); slice_167 = slice_scatter_36 = None slice_scatter_38: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_166, slice_scatter_37, 1, 0, -7); slice_166 = slice_scatter_37 = None slice_scatter_39: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(full_1, slice_scatter_38, 0, 0, 9223372036854775807); full_1 = slice_scatter_38 = None _tensor_constant14 = self._tensor_constant14 lift_fresh_copy_14: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant14); _tensor_constant14 = None slice_177: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_39, 0, 0, 9223372036854775807) slice_178: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_177, 1, 0, -7); slice_177 = None slice_179: "f32[1, 133, 4, 1]" = torch.ops.aten.slice.Tensor(slice_178, 2, -7, -3); slice_178 = None slice_180: "f32[1, 133, 4, 1]" = torch.ops.aten.slice.Tensor(slice_179, 3, 0, 9223372036854775807); slice_179 = None copy_10: "f32[1, 133, 4, 1]" = torch.ops.aten.copy.default(slice_180, lift_fresh_copy_14); slice_180 = lift_fresh_copy_14 = None slice_181: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_39, 0, 0, 9223372036854775807) slice_182: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_181, 1, 0, -7) slice_183: "f32[1, 133, 4, 1]" = torch.ops.aten.slice.Tensor(slice_182, 2, -7, -3) slice_scatter_40: "f32[1, 133, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_183, copy_10, 3, 0, 9223372036854775807); slice_183 = copy_10 = None slice_scatter_41: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_182, slice_scatter_40, 2, -7, -3); slice_182 = slice_scatter_40 = None slice_scatter_42: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_181, slice_scatter_41, 1, 0, -7); slice_181 = slice_scatter_41 = None slice_scatter_43: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_39, slice_scatter_42, 0, 0, 9223372036854775807); slice_scatter_39 = slice_scatter_42 = None _tensor_constant15 = self._tensor_constant15 lift_fresh_copy_15: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant15); _tensor_constant15 = None slice_192: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_43, 0, 0, 9223372036854775807) slice_193: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_192, 1, 0, -7); slice_192 = None slice_194: "f32[1, 133, 3, 1]" = torch.ops.aten.slice.Tensor(slice_193, 2, -3, 9223372036854775807); slice_193 = None slice_195: "f32[1, 133, 3, 1]" = torch.ops.aten.slice.Tensor(slice_194, 3, 0, 9223372036854775807); slice_194 = None copy_11: "f32[1, 133, 3, 1]" = torch.ops.aten.copy.default(slice_195, lift_fresh_copy_15); slice_195 = lift_fresh_copy_15 = None slice_196: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_43, 0, 0, 9223372036854775807) slice_197: "f32[1, 133, 140, 1]" = torch.ops.aten.slice.Tensor(slice_196, 1, 0, -7) slice_198: "f32[1, 133, 3, 1]" = torch.ops.aten.slice.Tensor(slice_197, 2, -3, 9223372036854775807) slice_scatter_44: "f32[1, 133, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_198, copy_11, 3, 0, 9223372036854775807); slice_198 = copy_11 = None slice_scatter_45: "f32[1, 133, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_197, slice_scatter_44, 2, -3, 9223372036854775807); slice_197 = slice_scatter_44 = None slice_scatter_46: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_196, slice_scatter_45, 1, 0, -7); slice_196 = slice_scatter_45 = None slice_scatter_47: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_43, slice_scatter_46, 0, 0, 9223372036854775807); slice_scatter_43 = slice_scatter_46 = None _tensor_constant16 = self._tensor_constant16 lift_fresh_copy_16: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant16); _tensor_constant16 = None slice_207: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_47, 0, 0, 9223372036854775807) slice_208: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_207, 1, -7, -3); slice_207 = None slice_209: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(slice_208, 2, 0, -7); slice_208 = None slice_210: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(slice_209, 3, 0, 9223372036854775807); slice_209 = None copy_12: "f32[1, 4, 133, 1]" = torch.ops.aten.copy.default(slice_210, lift_fresh_copy_16); slice_210 = lift_fresh_copy_16 = None slice_211: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_47, 0, 0, 9223372036854775807) slice_212: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_211, 1, -7, -3) slice_213: "f32[1, 4, 133, 1]" = torch.ops.aten.slice.Tensor(slice_212, 2, 0, -7) slice_scatter_48: "f32[1, 4, 133, 1]" = torch.ops.aten.slice_scatter.default(slice_213, copy_12, 3, 0, 9223372036854775807); slice_213 = copy_12 = None slice_scatter_49: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_212, slice_scatter_48, 2, 0, -7); slice_212 = slice_scatter_48 = None slice_scatter_50: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_211, slice_scatter_49, 1, -7, -3); slice_211 = slice_scatter_49 = None slice_scatter_51: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_47, slice_scatter_50, 0, 0, 9223372036854775807); slice_scatter_47 = slice_scatter_50 = None _tensor_constant17 = self._tensor_constant17 lift_fresh_copy_17: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant17); _tensor_constant17 = None slice_222: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_51, 0, 0, 9223372036854775807) slice_223: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_222, 1, -7, -3); slice_222 = None slice_224: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_223, 2, -7, -3); slice_223 = None slice_225: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_224, 3, 0, 9223372036854775807); slice_224 = None copy_13: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_225, lift_fresh_copy_17); slice_225 = lift_fresh_copy_17 = None slice_226: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_51, 0, 0, 9223372036854775807) slice_227: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_226, 1, -7, -3) slice_228: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_227, 2, -7, -3) slice_scatter_52: "f32[1, 4, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_228, copy_13, 3, 0, 9223372036854775807); slice_228 = copy_13 = None slice_scatter_53: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_227, slice_scatter_52, 2, -7, -3); slice_227 = slice_scatter_52 = None slice_scatter_54: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_226, slice_scatter_53, 1, -7, -3); slice_226 = slice_scatter_53 = None slice_scatter_55: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_51, slice_scatter_54, 0, 0, 9223372036854775807); slice_scatter_51 = slice_scatter_54 = None _tensor_constant18 = self._tensor_constant18 lift_fresh_copy_18: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant18); _tensor_constant18 = None slice_237: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_55, 0, 0, 9223372036854775807) slice_238: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_237, 1, -7, -3); slice_237 = None slice_239: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_238, 2, -3, 9223372036854775807); slice_238 = None slice_240: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_239, 3, 0, 9223372036854775807); slice_239 = None copy_14: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_240, lift_fresh_copy_18); slice_240 = lift_fresh_copy_18 = None slice_241: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_55, 0, 0, 9223372036854775807) slice_242: "f32[1, 4, 140, 1]" = torch.ops.aten.slice.Tensor(slice_241, 1, -7, -3) slice_243: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_242, 2, -3, 9223372036854775807) slice_scatter_56: "f32[1, 4, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_243, copy_14, 3, 0, 9223372036854775807); slice_243 = copy_14 = None slice_scatter_57: "f32[1, 4, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_242, slice_scatter_56, 2, -3, 9223372036854775807); slice_242 = slice_scatter_56 = None slice_scatter_58: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_241, slice_scatter_57, 1, -7, -3); slice_241 = slice_scatter_57 = None slice_scatter_59: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_55, slice_scatter_58, 0, 0, 9223372036854775807); slice_scatter_55 = slice_scatter_58 = None _tensor_constant19 = self._tensor_constant19 lift_fresh_copy_19: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant19); _tensor_constant19 = None slice_252: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_59, 0, 0, 9223372036854775807) slice_253: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_252, 1, -3, 9223372036854775807); slice_252 = None slice_254: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(slice_253, 2, 0, -7); slice_253 = None slice_255: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(slice_254, 3, 0, 9223372036854775807); slice_254 = None copy_15: "f32[1, 3, 133, 1]" = torch.ops.aten.copy.default(slice_255, lift_fresh_copy_19); slice_255 = lift_fresh_copy_19 = None slice_256: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_59, 0, 0, 9223372036854775807) slice_257: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_256, 1, -3, 9223372036854775807) slice_258: "f32[1, 3, 133, 1]" = torch.ops.aten.slice.Tensor(slice_257, 2, 0, -7) slice_scatter_60: "f32[1, 3, 133, 1]" = torch.ops.aten.slice_scatter.default(slice_258, copy_15, 3, 0, 9223372036854775807); slice_258 = copy_15 = None slice_scatter_61: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_257, slice_scatter_60, 2, 0, -7); slice_257 = slice_scatter_60 = None slice_scatter_62: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_256, slice_scatter_61, 1, -3, 9223372036854775807); slice_256 = slice_scatter_61 = None slice_scatter_63: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_59, slice_scatter_62, 0, 0, 9223372036854775807); slice_scatter_59 = slice_scatter_62 = None _tensor_constant20 = self._tensor_constant20 lift_fresh_copy_20: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant20); _tensor_constant20 = None slice_267: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_63, 0, 0, 9223372036854775807) slice_268: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_267, 1, -3, 9223372036854775807); slice_267 = None slice_269: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_268, 2, -7, -3); slice_268 = None slice_270: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_269, 3, 0, 9223372036854775807); slice_269 = None copy_16: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_270, lift_fresh_copy_20); slice_270 = lift_fresh_copy_20 = None slice_271: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_63, 0, 0, 9223372036854775807) slice_272: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_271, 1, -3, 9223372036854775807) slice_273: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_272, 2, -7, -3) slice_scatter_64: "f32[1, 3, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_273, copy_16, 3, 0, 9223372036854775807); slice_273 = copy_16 = None slice_scatter_65: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_272, slice_scatter_64, 2, -7, -3); slice_272 = slice_scatter_64 = None slice_scatter_66: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_271, slice_scatter_65, 1, -3, 9223372036854775807); slice_271 = slice_scatter_65 = None slice_scatter_67: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_63, slice_scatter_66, 0, 0, 9223372036854775807); slice_scatter_63 = slice_scatter_66 = None _tensor_constant21 = self._tensor_constant21 lift_fresh_copy_21: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant21); _tensor_constant21 = None slice_282: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_67, 0, 0, 9223372036854775807) slice_283: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_282, 1, -3, 9223372036854775807); slice_282 = None slice_284: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_283, 2, -3, 9223372036854775807); slice_283 = None slice_285: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_284, 3, 0, 9223372036854775807); slice_284 = None copy_17: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_285, lift_fresh_copy_21); slice_285 = lift_fresh_copy_21 = None slice_286: "f32[1, 140, 140, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_67, 0, 0, 9223372036854775807) slice_287: "f32[1, 3, 140, 1]" = torch.ops.aten.slice.Tensor(slice_286, 1, -3, 9223372036854775807) slice_288: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_287, 2, -3, 9223372036854775807) slice_scatter_68: "f32[1, 3, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_288, copy_17, 3, 0, 9223372036854775807); slice_288 = copy_17 = None slice_scatter_69: "f32[1, 3, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_287, slice_scatter_68, 2, -3, 9223372036854775807); slice_287 = slice_scatter_68 = None slice_scatter_70: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_286, slice_scatter_69, 1, -3, 9223372036854775807); slice_286 = slice_scatter_69 = None slice_scatter_71: "f32[1, 140, 140, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_67, slice_scatter_70, 0, 0, 9223372036854775807); slice_scatter_67 = slice_scatter_70 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_67: "f32[1, 20, 7, 20, 7, 1]" = torch.ops.aten.view.default(slice_scatter_71, [1, 20, 7, 20, 7, 1]); slice_scatter_71 = None permute_28: "f32[1, 20, 20, 7, 7, 1]" = torch.ops.aten.permute.default(view_67, [0, 1, 3, 2, 4, 5]); view_67 = None clone_28: "f32[1, 20, 20, 7, 7, 1]" = torch.ops.aten.clone.default(permute_28, memory_format = torch.contiguous_format); permute_28 = None view_68: "f32[400, 7, 7, 1]" = torch.ops.aten.view.default(clone_28, [-1, 7, 7, 1]); clone_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_69: "f32[400, 49]" = torch.ops.aten.view.default(view_68, [-1, 49]); view_68 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze_6: "f32[400, 1, 49]" = torch.ops.aten.unsqueeze.default(view_69, 1) unsqueeze_7: "f32[400, 49, 1]" = torch.ops.aten.unsqueeze.default(view_69, 2); view_69 = None sub_10: "f32[400, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze_6, unsqueeze_7); unsqueeze_6 = unsqueeze_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne_1: "b8[400, 49, 49]" = torch.ops.aten.ne.Scalar(sub_10, 0) scalar_tensor_2: "f32[]" = torch.ops.aten.scalar_tensor.default(-100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where_2: "f32[400, 49, 49]" = torch.ops.aten.where.self(ne_1, scalar_tensor_2, sub_10); ne_1 = scalar_tensor_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq_1: "b8[400, 49, 49]" = torch.ops.aten.eq.Scalar(sub_10, 0); sub_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( scalar_tensor_3: "f32[]" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where_3: "f32[400, 49, 49]" = torch.ops.aten.where.self(eq_1, scalar_tensor_3, where_2); eq_1 = scalar_tensor_3 = where_2 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_70: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(view_64, torch.float32) var_mean_7 = torch.ops.aten.var_mean.correction(convert_element_type_70, [2], correction = 0, keepdim = True) getitem_14: "f32[1, 18496, 1]" = var_mean_7[0] getitem_15: "f32[1, 18496, 1]" = var_mean_7[1]; var_mean_7 = None add_27: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_14, 1e-05); getitem_14 = None rsqrt_7: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_27); add_27 = None sub_11: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_70, getitem_15); convert_element_type_70 = getitem_15 = None mul_26: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_11, rsqrt_7); sub_11 = rsqrt_7 = None mul_27: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_26, arg38_1); mul_26 = arg38_1 = None add_28: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_27, arg39_1); mul_27 = arg39_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_70: "f32[1, 136, 136, 256]" = torch.ops.aten.view.default(add_28, [1, 136, 136, 256]); add_28 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_2: "f32[1, 140, 140, 256]" = torch.ops.aten.constant_pad_nd.default(view_70, [0, 0, 0, 4, 0, 4], 0.0); view_70 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_71: "f32[1, 20, 7, 20, 7, 256]" = torch.ops.aten.view.default(constant_pad_nd_2, [1, 20, 7, 20, 7, 256]); constant_pad_nd_2 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_29: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.permute.default(view_71, [0, 1, 3, 2, 4, 5]); view_71 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_29: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.clone.default(permute_29, memory_format = torch.contiguous_format); permute_29 = None view_72: "f32[400, 7, 7, 256]" = torch.ops.aten.view.default(clone_29, [-1, 7, 7, 256]); clone_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_73: "f32[400, 49, 256]" = torch.ops.aten.view.default(view_72, [-1, 49, 256]); view_72 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_71: "f16[768]" = torch.ops.prims.convert_element_type.default(arg41_1, torch.float16); arg41_1 = None convert_element_type_72: "f16[768, 256]" = torch.ops.prims.convert_element_type.default(arg40_1, torch.float16); arg40_1 = None convert_element_type_73: "f16[400, 49, 256]" = torch.ops.prims.convert_element_type.default(view_73, torch.float16); view_73 = None view_74: "f16[19600, 256]" = torch.ops.aten.view.default(convert_element_type_73, [19600, 256]); convert_element_type_73 = None permute_30: "f16[256, 768]" = torch.ops.aten.permute.default(convert_element_type_72, [1, 0]); convert_element_type_72 = None addmm_8: "f16[19600, 768]" = torch.ops.aten.addmm.default(convert_element_type_71, view_74, permute_30); convert_element_type_71 = view_74 = permute_30 = None view_75: "f16[400, 49, 768]" = torch.ops.aten.view.default(addmm_8, [400, 49, 768]); addmm_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_76: "f16[400, 49, 3, 8, 32]" = torch.ops.aten.view.default(view_75, [400, 49, 3, 8, 32]); view_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_31: "f16[3, 400, 8, 49, 32]" = torch.ops.aten.permute.default(view_76, [2, 0, 3, 1, 4]); view_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_6: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 0) select_7: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 1) select_8: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_31, 0, 2); permute_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_28: "f16[400, 8, 49, 32]" = torch.ops.aten.mul.Tensor(select_6, 0.1767766952966369); select_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_32: "f16[400, 8, 32, 49]" = torch.ops.aten.permute.default(select_7, [0, 1, 3, 2]); select_7 = None expand_8: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(mul_28, [400, 8, 49, 32]); mul_28 = None clone_30: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_8, memory_format = torch.contiguous_format); expand_8 = None view_77: "f16[3200, 49, 32]" = torch.ops.aten.view.default(clone_30, [3200, 49, 32]); clone_30 = None expand_9: "f16[400, 8, 32, 49]" = torch.ops.aten.expand.default(permute_32, [400, 8, 32, 49]); permute_32 = None clone_31: "f16[400, 8, 32, 49]" = torch.ops.aten.clone.default(expand_9, memory_format = torch.contiguous_format); expand_9 = None view_78: "f16[3200, 32, 49]" = torch.ops.aten.view.default(clone_31, [3200, 32, 49]); clone_31 = None bmm_4: "f16[3200, 49, 49]" = torch.ops.aten.bmm.default(view_77, view_78); view_77 = view_78 = None view_79: "f16[400, 8, 49, 49]" = torch.ops.aten.view.default(bmm_4, [400, 8, 49, 49]); bmm_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_80: "i64[2401]" = torch.ops.aten.view.default(arg43_1, [-1]); arg43_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_6: "f32[2401, 8]" = torch.ops.aten.index.Tensor(arg42_1, [view_80]); arg42_1 = view_80 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_81: "f32[49, 49, 8]" = torch.ops.aten.view.default(index_6, [49, 49, -1]); index_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_33: "f32[8, 49, 49]" = torch.ops.aten.permute.default(view_81, [2, 0, 1]); view_81 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_32: "f32[8, 49, 49]" = torch.ops.aten.clone.default(permute_33, memory_format = torch.contiguous_format); permute_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_8: "f32[1, 8, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_32, 0); clone_32 = None add_29: "f32[400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_79, unsqueeze_8); view_79 = unsqueeze_8 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_2: "f32[400, 8, 49, 1]" = torch.ops.aten.amax.default(add_29, [-1], True) sub_12: "f32[400, 8, 49, 49]" = torch.ops.aten.sub.Tensor(add_29, amax_2); add_29 = amax_2 = None exp_2: "f32[400, 8, 49, 49]" = torch.ops.aten.exp.default(sub_12); sub_12 = None sum_3: "f32[400, 8, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_2, [-1], True) div_6: "f32[400, 8, 49, 49]" = torch.ops.aten.div.Tensor(exp_2, sum_3); exp_2 = sum_3 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_33: "f32[400, 8, 49, 49]" = torch.ops.aten.clone.default(div_6); div_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_79: "f16[400, 8, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_33, torch.float16); clone_33 = None expand_10: "f16[400, 8, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_79, [400, 8, 49, 49]); convert_element_type_79 = None view_82: "f16[3200, 49, 49]" = torch.ops.aten.view.default(expand_10, [3200, 49, 49]); expand_10 = None expand_11: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(select_8, [400, 8, 49, 32]); select_8 = None clone_34: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_11, memory_format = torch.contiguous_format); expand_11 = None view_83: "f16[3200, 49, 32]" = torch.ops.aten.view.default(clone_34, [3200, 49, 32]); clone_34 = None bmm_5: "f16[3200, 49, 32]" = torch.ops.aten.bmm.default(view_82, view_83); view_82 = view_83 = None view_84: "f16[400, 8, 49, 32]" = torch.ops.aten.view.default(bmm_5, [400, 8, 49, 32]); bmm_5 = None permute_34: "f16[400, 49, 8, 32]" = torch.ops.aten.permute.default(view_84, [0, 2, 1, 3]); view_84 = None clone_35: "f16[400, 49, 8, 32]" = torch.ops.aten.clone.default(permute_34, memory_format = torch.contiguous_format); permute_34 = None view_85: "f16[400, 49, 256]" = torch.ops.aten.view.default(clone_35, [400, 49, 256]); clone_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_82: "f16[256]" = torch.ops.prims.convert_element_type.default(arg45_1, torch.float16); arg45_1 = None convert_element_type_83: "f16[256, 256]" = torch.ops.prims.convert_element_type.default(arg44_1, torch.float16); arg44_1 = None view_86: "f16[19600, 256]" = torch.ops.aten.view.default(view_85, [19600, 256]); view_85 = None permute_35: "f16[256, 256]" = torch.ops.aten.permute.default(convert_element_type_83, [1, 0]); convert_element_type_83 = None addmm_9: "f16[19600, 256]" = torch.ops.aten.addmm.default(convert_element_type_82, view_86, permute_35); convert_element_type_82 = view_86 = permute_35 = None view_87: "f16[400, 49, 256]" = torch.ops.aten.view.default(addmm_9, [400, 49, 256]); addmm_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_36: "f16[400, 49, 256]" = torch.ops.aten.clone.default(view_87); view_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_88: "f16[400, 7, 7, 256]" = torch.ops.aten.view.default(clone_36, [-1, 7, 7, 256]); clone_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_89: "f16[1, 20, 20, 7, 7, 256]" = torch.ops.aten.view.default(view_88, [1, 20, 20, 7, 7, -1]); view_88 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_36: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.permute.default(view_89, [0, 1, 3, 2, 4, 5]); view_89 = None clone_37: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.clone.default(permute_36, memory_format = torch.contiguous_format); permute_36 = None view_90: "f16[1, 140, 140, 256]" = torch.ops.aten.view.default(clone_37, [1, 140, 140, -1]); clone_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_293: "f16[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(view_90, 0, 0, 9223372036854775807); view_90 = None slice_294: "f16[1, 136, 140, 256]" = torch.ops.aten.slice.Tensor(slice_293, 1, 0, 136); slice_293 = None slice_295: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_294, 2, 0, 136); slice_294 = None slice_296: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_295, 3, 0, 9223372036854775807); slice_295 = None clone_38: "f16[1, 136, 136, 256]" = torch.ops.aten.clone.default(slice_296, memory_format = torch.contiguous_format); slice_296 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_91: "f16[1, 18496, 256]" = torch.ops.aten.view.default(clone_38, [1, 18496, 256]); clone_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_30: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(view_64, view_91); view_64 = view_91 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_87: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_30, torch.float32) var_mean_8 = torch.ops.aten.var_mean.correction(convert_element_type_87, [2], correction = 0, keepdim = True) getitem_16: "f32[1, 18496, 1]" = var_mean_8[0] getitem_17: "f32[1, 18496, 1]" = var_mean_8[1]; var_mean_8 = None add_31: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_16, 1e-05); getitem_16 = None rsqrt_8: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_31); add_31 = None sub_13: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_87, getitem_17); convert_element_type_87 = getitem_17 = None mul_29: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_13, rsqrt_8); sub_13 = rsqrt_8 = None mul_30: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_29, arg46_1); mul_29 = arg46_1 = None add_32: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_30, arg47_1); mul_30 = arg47_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_88: "f16[1024]" = torch.ops.prims.convert_element_type.default(arg49_1, torch.float16); arg49_1 = None convert_element_type_89: "f16[1024, 256]" = torch.ops.prims.convert_element_type.default(arg48_1, torch.float16); arg48_1 = None convert_element_type_90: "f16[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_32, torch.float16); add_32 = None view_92: "f16[18496, 256]" = torch.ops.aten.view.default(convert_element_type_90, [18496, 256]); convert_element_type_90 = None permute_37: "f16[256, 1024]" = torch.ops.aten.permute.default(convert_element_type_89, [1, 0]); convert_element_type_89 = None addmm_10: "f16[18496, 1024]" = torch.ops.aten.addmm.default(convert_element_type_88, view_92, permute_37); convert_element_type_88 = view_92 = permute_37 = None view_93: "f16[1, 18496, 1024]" = torch.ops.aten.view.default(addmm_10, [1, 18496, 1024]); addmm_10 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_94: "f32[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(view_93, torch.float32); view_93 = None mul_31: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_94, 0.5) mul_32: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_94, 0.7071067811865476); convert_element_type_94 = None erf_2: "f32[1, 18496, 1024]" = torch.ops.aten.erf.default(mul_32); mul_32 = None add_33: "f32[1, 18496, 1024]" = torch.ops.aten.add.Tensor(erf_2, 1); erf_2 = None mul_33: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(mul_31, add_33); mul_31 = add_33 = None convert_element_type_95: "f16[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(mul_33, torch.float16); mul_33 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_39: "f16[1, 18496, 1024]" = torch.ops.aten.clone.default(convert_element_type_95); convert_element_type_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_96: "f16[256]" = torch.ops.prims.convert_element_type.default(arg51_1, torch.float16); arg51_1 = None convert_element_type_97: "f16[256, 1024]" = torch.ops.prims.convert_element_type.default(arg50_1, torch.float16); arg50_1 = None view_94: "f16[18496, 1024]" = torch.ops.aten.view.default(clone_39, [18496, 1024]); clone_39 = None permute_38: "f16[1024, 256]" = torch.ops.aten.permute.default(convert_element_type_97, [1, 0]); convert_element_type_97 = None addmm_11: "f16[18496, 256]" = torch.ops.aten.addmm.default(convert_element_type_96, view_94, permute_38); convert_element_type_96 = view_94 = permute_38 = None view_95: "f16[1, 18496, 256]" = torch.ops.aten.view.default(addmm_11, [1, 18496, 256]); addmm_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_40: "f16[1, 18496, 256]" = torch.ops.aten.clone.default(view_95); view_95 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_34: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_30, clone_40); add_30 = clone_40 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_101: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_34, torch.float32) var_mean_9 = torch.ops.aten.var_mean.correction(convert_element_type_101, [2], correction = 0, keepdim = True) getitem_18: "f32[1, 18496, 1]" = var_mean_9[0] getitem_19: "f32[1, 18496, 1]" = var_mean_9[1]; var_mean_9 = None add_35: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_18, 1e-05); getitem_18 = None rsqrt_9: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_35); add_35 = None sub_14: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_101, getitem_19); convert_element_type_101 = getitem_19 = None mul_34: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_14, rsqrt_9); sub_14 = rsqrt_9 = None mul_35: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_34, arg52_1); mul_34 = arg52_1 = None add_36: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_35, arg53_1); mul_35 = arg53_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_96: "f32[1, 136, 136, 256]" = torch.ops.aten.view.default(add_36, [1, 136, 136, 256]); add_36 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_3: "f32[1, 140, 140, 256]" = torch.ops.aten.constant_pad_nd.default(view_96, [0, 0, 0, 4, 0, 4], 0.0); view_96 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_4: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_37: "i64[140]" = torch.ops.aten.add.Tensor(iota_4, 3); iota_4 = None fmod_4: "i64[140]" = torch.ops.aten.fmod.Scalar(add_37, 140); add_37 = None slice_297: "f32[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(constant_pad_nd_3, 0, 0, 9223372036854775807); constant_pad_nd_3 = None index_7: "f32[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(slice_297, [None, fmod_4]); slice_297 = fmod_4 = None iota_5: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_38: "i64[140]" = torch.ops.aten.add.Tensor(iota_5, 3); iota_5 = None fmod_5: "i64[140]" = torch.ops.aten.fmod.Scalar(add_38, 140); add_38 = None slice_298: "f32[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(index_7, 0, 0, 9223372036854775807); index_7 = None slice_299: "f32[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(slice_298, 1, 0, 9223372036854775807); slice_298 = None index_8: "f32[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(slice_299, [None, None, fmod_5]); slice_299 = fmod_5 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_97: "f32[1, 20, 7, 20, 7, 256]" = torch.ops.aten.view.default(index_8, [1, 20, 7, 20, 7, 256]); index_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_39: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.permute.default(view_97, [0, 1, 3, 2, 4, 5]); view_97 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_41: "f32[1, 20, 20, 7, 7, 256]" = torch.ops.aten.clone.default(permute_39, memory_format = torch.contiguous_format); permute_39 = None view_98: "f32[400, 7, 7, 256]" = torch.ops.aten.view.default(clone_41, [-1, 7, 7, 256]); clone_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_99: "f32[400, 49, 256]" = torch.ops.aten.view.default(view_98, [-1, 49, 256]); view_98 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_102: "f16[768]" = torch.ops.prims.convert_element_type.default(arg55_1, torch.float16); arg55_1 = None convert_element_type_103: "f16[768, 256]" = torch.ops.prims.convert_element_type.default(arg54_1, torch.float16); arg54_1 = None convert_element_type_104: "f16[400, 49, 256]" = torch.ops.prims.convert_element_type.default(view_99, torch.float16); view_99 = None view_100: "f16[19600, 256]" = torch.ops.aten.view.default(convert_element_type_104, [19600, 256]); convert_element_type_104 = None permute_40: "f16[256, 768]" = torch.ops.aten.permute.default(convert_element_type_103, [1, 0]); convert_element_type_103 = None addmm_12: "f16[19600, 768]" = torch.ops.aten.addmm.default(convert_element_type_102, view_100, permute_40); convert_element_type_102 = view_100 = permute_40 = None view_101: "f16[400, 49, 768]" = torch.ops.aten.view.default(addmm_12, [400, 49, 768]); addmm_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_102: "f16[400, 49, 3, 8, 32]" = torch.ops.aten.view.default(view_101, [400, 49, 3, 8, 32]); view_101 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_41: "f16[3, 400, 8, 49, 32]" = torch.ops.aten.permute.default(view_102, [2, 0, 3, 1, 4]); view_102 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_9: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 0) select_10: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 1) select_11: "f16[400, 8, 49, 32]" = torch.ops.aten.select.int(permute_41, 0, 2); permute_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_36: "f16[400, 8, 49, 32]" = torch.ops.aten.mul.Tensor(select_9, 0.1767766952966369); select_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_42: "f16[400, 8, 32, 49]" = torch.ops.aten.permute.default(select_10, [0, 1, 3, 2]); select_10 = None expand_12: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(mul_36, [400, 8, 49, 32]); mul_36 = None clone_42: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_12, memory_format = torch.contiguous_format); expand_12 = None view_103: "f16[3200, 49, 32]" = torch.ops.aten.view.default(clone_42, [3200, 49, 32]); clone_42 = None expand_13: "f16[400, 8, 32, 49]" = torch.ops.aten.expand.default(permute_42, [400, 8, 32, 49]); permute_42 = None clone_43: "f16[400, 8, 32, 49]" = torch.ops.aten.clone.default(expand_13, memory_format = torch.contiguous_format); expand_13 = None view_104: "f16[3200, 32, 49]" = torch.ops.aten.view.default(clone_43, [3200, 32, 49]); clone_43 = None bmm_6: "f16[3200, 49, 49]" = torch.ops.aten.bmm.default(view_103, view_104); view_103 = view_104 = None view_105: "f16[400, 8, 49, 49]" = torch.ops.aten.view.default(bmm_6, [400, 8, 49, 49]); bmm_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_106: "i64[2401]" = torch.ops.aten.view.default(arg57_1, [-1]); arg57_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_9: "f32[2401, 8]" = torch.ops.aten.index.Tensor(arg56_1, [view_106]); arg56_1 = view_106 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_107: "f32[49, 49, 8]" = torch.ops.aten.view.default(index_9, [49, 49, -1]); index_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_43: "f32[8, 49, 49]" = torch.ops.aten.permute.default(view_107, [2, 0, 1]); view_107 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_44: "f32[8, 49, 49]" = torch.ops.aten.clone.default(permute_43, memory_format = torch.contiguous_format); permute_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_9: "f32[1, 8, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_44, 0); clone_44 = None add_39: "f32[400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_105, unsqueeze_9); view_105 = unsqueeze_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_108: "f32[1, 400, 8, 49, 49]" = torch.ops.aten.view.default(add_39, [1, 400, 8, 49, 49]); add_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_10: "f32[400, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_3, 1); where_3 = None unsqueeze_11: "f32[1, 400, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_10, 0); unsqueeze_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_40: "f32[1, 400, 8, 49, 49]" = torch.ops.aten.add.Tensor(view_108, unsqueeze_11); view_108 = unsqueeze_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_109: "f32[400, 8, 49, 49]" = torch.ops.aten.view.default(add_40, [-1, 8, 49, 49]); add_40 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_3: "f32[400, 8, 49, 1]" = torch.ops.aten.amax.default(view_109, [-1], True) sub_15: "f32[400, 8, 49, 49]" = torch.ops.aten.sub.Tensor(view_109, amax_3); view_109 = amax_3 = None exp_3: "f32[400, 8, 49, 49]" = torch.ops.aten.exp.default(sub_15); sub_15 = None sum_4: "f32[400, 8, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_3, [-1], True) div_7: "f32[400, 8, 49, 49]" = torch.ops.aten.div.Tensor(exp_3, sum_4); exp_3 = sum_4 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_45: "f32[400, 8, 49, 49]" = torch.ops.aten.clone.default(div_7); div_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_110: "f16[400, 8, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_45, torch.float16); clone_45 = None expand_14: "f16[400, 8, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_110, [400, 8, 49, 49]); convert_element_type_110 = None view_110: "f16[3200, 49, 49]" = torch.ops.aten.view.default(expand_14, [3200, 49, 49]); expand_14 = None expand_15: "f16[400, 8, 49, 32]" = torch.ops.aten.expand.default(select_11, [400, 8, 49, 32]); select_11 = None clone_46: "f16[400, 8, 49, 32]" = torch.ops.aten.clone.default(expand_15, memory_format = torch.contiguous_format); expand_15 = None view_111: "f16[3200, 49, 32]" = torch.ops.aten.view.default(clone_46, [3200, 49, 32]); clone_46 = None bmm_7: "f16[3200, 49, 32]" = torch.ops.aten.bmm.default(view_110, view_111); view_110 = view_111 = None view_112: "f16[400, 8, 49, 32]" = torch.ops.aten.view.default(bmm_7, [400, 8, 49, 32]); bmm_7 = None permute_44: "f16[400, 49, 8, 32]" = torch.ops.aten.permute.default(view_112, [0, 2, 1, 3]); view_112 = None clone_47: "f16[400, 49, 8, 32]" = torch.ops.aten.clone.default(permute_44, memory_format = torch.contiguous_format); permute_44 = None view_113: "f16[400, 49, 256]" = torch.ops.aten.view.default(clone_47, [400, 49, 256]); clone_47 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_113: "f16[256]" = torch.ops.prims.convert_element_type.default(arg59_1, torch.float16); arg59_1 = None convert_element_type_114: "f16[256, 256]" = torch.ops.prims.convert_element_type.default(arg58_1, torch.float16); arg58_1 = None view_114: "f16[19600, 256]" = torch.ops.aten.view.default(view_113, [19600, 256]); view_113 = None permute_45: "f16[256, 256]" = torch.ops.aten.permute.default(convert_element_type_114, [1, 0]); convert_element_type_114 = None addmm_13: "f16[19600, 256]" = torch.ops.aten.addmm.default(convert_element_type_113, view_114, permute_45); convert_element_type_113 = view_114 = permute_45 = None view_115: "f16[400, 49, 256]" = torch.ops.aten.view.default(addmm_13, [400, 49, 256]); addmm_13 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_48: "f16[400, 49, 256]" = torch.ops.aten.clone.default(view_115); view_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_116: "f16[400, 7, 7, 256]" = torch.ops.aten.view.default(clone_48, [-1, 7, 7, 256]); clone_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_117: "f16[1, 20, 20, 7, 7, 256]" = torch.ops.aten.view.default(view_116, [1, 20, 20, 7, 7, -1]); view_116 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_46: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.permute.default(view_117, [0, 1, 3, 2, 4, 5]); view_117 = None clone_49: "f16[1, 20, 7, 20, 7, 256]" = torch.ops.aten.clone.default(permute_46, memory_format = torch.contiguous_format); permute_46 = None view_118: "f16[1, 140, 140, 256]" = torch.ops.aten.view.default(clone_49, [1, 140, 140, -1]); clone_49 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_6: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_41: "i64[140]" = torch.ops.aten.add.Tensor(iota_6, 137); iota_6 = None fmod_6: "i64[140]" = torch.ops.aten.fmod.Scalar(add_41, 140); add_41 = None slice_300: "f16[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(view_118, 0, 0, 9223372036854775807); view_118 = None index_10: "f16[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(slice_300, [None, fmod_6]); slice_300 = fmod_6 = None iota_7: "i64[140]" = torch.ops.prims.iota.default(140, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_42: "i64[140]" = torch.ops.aten.add.Tensor(iota_7, 137); iota_7 = None fmod_7: "i64[140]" = torch.ops.aten.fmod.Scalar(add_42, 140); add_42 = None slice_301: "f16[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(index_10, 0, 0, 9223372036854775807); index_10 = None slice_302: "f16[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(slice_301, 1, 0, 9223372036854775807); slice_301 = None index_11: "f16[1, 140, 140, 256]" = torch.ops.aten.index.Tensor(slice_302, [None, None, fmod_7]); slice_302 = fmod_7 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_303: "f16[1, 140, 140, 256]" = torch.ops.aten.slice.Tensor(index_11, 0, 0, 9223372036854775807); index_11 = None slice_304: "f16[1, 136, 140, 256]" = torch.ops.aten.slice.Tensor(slice_303, 1, 0, 136); slice_303 = None slice_305: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_304, 2, 0, 136); slice_304 = None slice_306: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(slice_305, 3, 0, 9223372036854775807); slice_305 = None clone_50: "f16[1, 136, 136, 256]" = torch.ops.aten.clone.default(slice_306, memory_format = torch.contiguous_format); slice_306 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_119: "f16[1, 18496, 256]" = torch.ops.aten.view.default(clone_50, [1, 18496, 256]); clone_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_43: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_34, view_119); add_34 = view_119 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_118: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_43, torch.float32) var_mean_10 = torch.ops.aten.var_mean.correction(convert_element_type_118, [2], correction = 0, keepdim = True) getitem_20: "f32[1, 18496, 1]" = var_mean_10[0] getitem_21: "f32[1, 18496, 1]" = var_mean_10[1]; var_mean_10 = None add_44: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_20, 1e-05); getitem_20 = None rsqrt_10: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_44); add_44 = None sub_16: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_118, getitem_21); convert_element_type_118 = getitem_21 = None mul_37: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_16, rsqrt_10); sub_16 = rsqrt_10 = None mul_38: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_37, arg60_1); mul_37 = arg60_1 = None add_45: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_38, arg61_1); mul_38 = arg61_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_119: "f16[1024]" = torch.ops.prims.convert_element_type.default(arg63_1, torch.float16); arg63_1 = None convert_element_type_120: "f16[1024, 256]" = torch.ops.prims.convert_element_type.default(arg62_1, torch.float16); arg62_1 = None convert_element_type_121: "f16[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_45, torch.float16); add_45 = None view_120: "f16[18496, 256]" = torch.ops.aten.view.default(convert_element_type_121, [18496, 256]); convert_element_type_121 = None permute_47: "f16[256, 1024]" = torch.ops.aten.permute.default(convert_element_type_120, [1, 0]); convert_element_type_120 = None addmm_14: "f16[18496, 1024]" = torch.ops.aten.addmm.default(convert_element_type_119, view_120, permute_47); convert_element_type_119 = view_120 = permute_47 = None view_121: "f16[1, 18496, 1024]" = torch.ops.aten.view.default(addmm_14, [1, 18496, 1024]); addmm_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_125: "f32[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(view_121, torch.float32); view_121 = None mul_39: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_125, 0.5) mul_40: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(convert_element_type_125, 0.7071067811865476); convert_element_type_125 = None erf_3: "f32[1, 18496, 1024]" = torch.ops.aten.erf.default(mul_40); mul_40 = None add_46: "f32[1, 18496, 1024]" = torch.ops.aten.add.Tensor(erf_3, 1); erf_3 = None mul_41: "f32[1, 18496, 1024]" = torch.ops.aten.mul.Tensor(mul_39, add_46); mul_39 = add_46 = None convert_element_type_126: "f16[1, 18496, 1024]" = torch.ops.prims.convert_element_type.default(mul_41, torch.float16); mul_41 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_51: "f16[1, 18496, 1024]" = torch.ops.aten.clone.default(convert_element_type_126); convert_element_type_126 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_127: "f16[256]" = torch.ops.prims.convert_element_type.default(arg65_1, torch.float16); arg65_1 = None convert_element_type_128: "f16[256, 1024]" = torch.ops.prims.convert_element_type.default(arg64_1, torch.float16); arg64_1 = None view_122: "f16[18496, 1024]" = torch.ops.aten.view.default(clone_51, [18496, 1024]); clone_51 = None permute_48: "f16[1024, 256]" = torch.ops.aten.permute.default(convert_element_type_128, [1, 0]); convert_element_type_128 = None addmm_15: "f16[18496, 256]" = torch.ops.aten.addmm.default(convert_element_type_127, view_122, permute_48); convert_element_type_127 = view_122 = permute_48 = None view_123: "f16[1, 18496, 256]" = torch.ops.aten.view.default(addmm_15, [1, 18496, 256]); addmm_15 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_52: "f16[1, 18496, 256]" = torch.ops.aten.clone.default(view_123); view_123 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_47: "f16[1, 18496, 256]" = torch.ops.aten.add.Tensor(add_43, clone_52); add_43 = clone_52 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:344 in forward, code: x = x.view(B, H, W, C) view_124: "f16[1, 136, 136, 256]" = torch.ops.aten.view.default(add_47, [1, 136, 136, 256]) # File: /workspace/networks/encoders/swin/swin_transformer.py:351 in forward, code: x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C slice_307: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 0, 0, 9223372036854775807) slice_308: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(slice_307, 1, 0, 9223372036854775807, 2); slice_307 = None slice_309: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_308, 2, 0, 9223372036854775807, 2); slice_308 = None slice_310: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_309, 3, 0, 9223372036854775807); slice_309 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:352 in forward, code: x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C slice_311: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 0, 0, 9223372036854775807) slice_312: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(slice_311, 1, 1, 9223372036854775807, 2); slice_311 = None slice_313: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_312, 2, 0, 9223372036854775807, 2); slice_312 = None slice_314: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_313, 3, 0, 9223372036854775807); slice_313 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:353 in forward, code: x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C slice_315: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 0, 0, 9223372036854775807) slice_316: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(slice_315, 1, 0, 9223372036854775807, 2); slice_315 = None slice_317: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_316, 2, 1, 9223372036854775807, 2); slice_316 = None slice_318: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_317, 3, 0, 9223372036854775807); slice_317 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:354 in forward, code: x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C slice_319: "f16[1, 136, 136, 256]" = torch.ops.aten.slice.Tensor(view_124, 0, 0, 9223372036854775807); view_124 = None slice_320: "f16[1, 68, 136, 256]" = torch.ops.aten.slice.Tensor(slice_319, 1, 1, 9223372036854775807, 2); slice_319 = None slice_321: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_320, 2, 1, 9223372036854775807, 2); slice_320 = None slice_322: "f16[1, 68, 68, 256]" = torch.ops.aten.slice.Tensor(slice_321, 3, 0, 9223372036854775807); slice_321 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:355 in forward, code: x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C cat_1: "f16[1, 68, 68, 1024]" = torch.ops.aten.cat.default([slice_310, slice_314, slice_318, slice_322], -1); slice_310 = slice_314 = slice_318 = slice_322 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:356 in forward, code: x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C view_125: "f16[1, 4624, 1024]" = torch.ops.aten.view.default(cat_1, [1, -1, 1024]); cat_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_132: "f32[1, 4624, 1024]" = torch.ops.prims.convert_element_type.default(view_125, torch.float32); view_125 = None var_mean_11 = torch.ops.aten.var_mean.correction(convert_element_type_132, [2], correction = 0, keepdim = True) getitem_22: "f32[1, 4624, 1]" = var_mean_11[0] getitem_23: "f32[1, 4624, 1]" = var_mean_11[1]; var_mean_11 = None add_48: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_22, 1e-05); getitem_22 = None rsqrt_11: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_48); add_48 = None sub_17: "f32[1, 4624, 1024]" = torch.ops.aten.sub.Tensor(convert_element_type_132, getitem_23); convert_element_type_132 = getitem_23 = None mul_42: "f32[1, 4624, 1024]" = torch.ops.aten.mul.Tensor(sub_17, rsqrt_11); sub_17 = rsqrt_11 = None mul_43: "f32[1, 4624, 1024]" = torch.ops.aten.mul.Tensor(mul_42, arg66_1); mul_42 = arg66_1 = None add_49: "f32[1, 4624, 1024]" = torch.ops.aten.add.Tensor(mul_43, arg67_1); mul_43 = arg67_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_133: "f16[512, 1024]" = torch.ops.prims.convert_element_type.default(arg68_1, torch.float16); arg68_1 = None convert_element_type_134: "f16[1, 4624, 1024]" = torch.ops.prims.convert_element_type.default(add_49, torch.float16); add_49 = None permute_49: "f16[1024, 512]" = torch.ops.aten.permute.default(convert_element_type_133, [1, 0]); convert_element_type_133 = None view_126: "f16[4624, 1024]" = torch.ops.aten.view.default(convert_element_type_134, [4624, 1024]); convert_element_type_134 = None mm_1: "f16[4624, 512]" = torch.ops.aten.mm.default(view_126, permute_49); view_126 = permute_49 = None view_127: "f16[1, 4624, 512]" = torch.ops.aten.view.default(mm_1, [1, 4624, 512]); mm_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_137: "f32[1, 18496, 256]" = torch.ops.prims.convert_element_type.default(add_47, torch.float32); add_47 = None var_mean_12 = torch.ops.aten.var_mean.correction(convert_element_type_137, [2], correction = 0, keepdim = True) getitem_24: "f32[1, 18496, 1]" = var_mean_12[0] getitem_25: "f32[1, 18496, 1]" = var_mean_12[1]; var_mean_12 = None add_50: "f32[1, 18496, 1]" = torch.ops.aten.add.Tensor(getitem_24, 1e-05); getitem_24 = None rsqrt_12: "f32[1, 18496, 1]" = torch.ops.aten.rsqrt.default(add_50); add_50 = None sub_18: "f32[1, 18496, 256]" = torch.ops.aten.sub.Tensor(convert_element_type_137, getitem_25); convert_element_type_137 = getitem_25 = None mul_44: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(sub_18, rsqrt_12); sub_18 = rsqrt_12 = None mul_45: "f32[1, 18496, 256]" = torch.ops.aten.mul.Tensor(mul_44, arg69_1); mul_44 = arg69_1 = None add_51: "f32[1, 18496, 256]" = torch.ops.aten.add.Tensor(mul_45, arg70_1); mul_45 = arg70_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_128: "f32[1, 136, 136, 256]" = torch.ops.aten.view.default(add_51, [-1, 136, 136, 256]); add_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_50: "f32[1, 256, 136, 136]" = torch.ops.aten.permute.default(view_128, [0, 3, 1, 2]); view_128 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_53: "f32[1, 256, 136, 136]" = torch.ops.aten.clone.default(permute_50, memory_format = torch.contiguous_format); permute_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:443 in forward, code: img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 full_2: "f32[1, 70, 70, 1]" = torch.ops.aten.full.default([1, 70, 70, 1], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) # File: /workspace/networks/encoders/swin/swin_transformer.py:453 in forward, code: img_mask[:, h, w, :] = cnt _tensor_constant24 = self._tensor_constant24 lift_fresh_copy_24: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant24); _tensor_constant24 = None slice_323: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(full_2, 0, 0, 9223372036854775807) slice_324: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_323, 1, 0, -7); slice_323 = None slice_325: "f32[1, 63, 63, 1]" = torch.ops.aten.slice.Tensor(slice_324, 2, 0, -7); slice_324 = None slice_326: "f32[1, 63, 63, 1]" = torch.ops.aten.slice.Tensor(slice_325, 3, 0, 9223372036854775807); slice_325 = None copy_18: "f32[1, 63, 63, 1]" = torch.ops.aten.copy.default(slice_326, lift_fresh_copy_24); slice_326 = lift_fresh_copy_24 = None slice_327: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(full_2, 0, 0, 9223372036854775807) slice_328: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_327, 1, 0, -7) slice_329: "f32[1, 63, 63, 1]" = torch.ops.aten.slice.Tensor(slice_328, 2, 0, -7) slice_scatter_72: "f32[1, 63, 63, 1]" = torch.ops.aten.slice_scatter.default(slice_329, copy_18, 3, 0, 9223372036854775807); slice_329 = copy_18 = None slice_scatter_73: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_328, slice_scatter_72, 2, 0, -7); slice_328 = slice_scatter_72 = None slice_scatter_74: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_327, slice_scatter_73, 1, 0, -7); slice_327 = slice_scatter_73 = None slice_scatter_75: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(full_2, slice_scatter_74, 0, 0, 9223372036854775807); full_2 = slice_scatter_74 = None _tensor_constant25 = self._tensor_constant25 lift_fresh_copy_25: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant25); _tensor_constant25 = None slice_338: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_75, 0, 0, 9223372036854775807) slice_339: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_338, 1, 0, -7); slice_338 = None slice_340: "f32[1, 63, 4, 1]" = torch.ops.aten.slice.Tensor(slice_339, 2, -7, -3); slice_339 = None slice_341: "f32[1, 63, 4, 1]" = torch.ops.aten.slice.Tensor(slice_340, 3, 0, 9223372036854775807); slice_340 = None copy_19: "f32[1, 63, 4, 1]" = torch.ops.aten.copy.default(slice_341, lift_fresh_copy_25); slice_341 = lift_fresh_copy_25 = None slice_342: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_75, 0, 0, 9223372036854775807) slice_343: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_342, 1, 0, -7) slice_344: "f32[1, 63, 4, 1]" = torch.ops.aten.slice.Tensor(slice_343, 2, -7, -3) slice_scatter_76: "f32[1, 63, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_344, copy_19, 3, 0, 9223372036854775807); slice_344 = copy_19 = None slice_scatter_77: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_343, slice_scatter_76, 2, -7, -3); slice_343 = slice_scatter_76 = None slice_scatter_78: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_342, slice_scatter_77, 1, 0, -7); slice_342 = slice_scatter_77 = None slice_scatter_79: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_75, slice_scatter_78, 0, 0, 9223372036854775807); slice_scatter_75 = slice_scatter_78 = None _tensor_constant26 = self._tensor_constant26 lift_fresh_copy_26: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant26); _tensor_constant26 = None slice_353: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_79, 0, 0, 9223372036854775807) slice_354: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_353, 1, 0, -7); slice_353 = None slice_355: "f32[1, 63, 3, 1]" = torch.ops.aten.slice.Tensor(slice_354, 2, -3, 9223372036854775807); slice_354 = None slice_356: "f32[1, 63, 3, 1]" = torch.ops.aten.slice.Tensor(slice_355, 3, 0, 9223372036854775807); slice_355 = None copy_20: "f32[1, 63, 3, 1]" = torch.ops.aten.copy.default(slice_356, lift_fresh_copy_26); slice_356 = lift_fresh_copy_26 = None slice_357: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_79, 0, 0, 9223372036854775807) slice_358: "f32[1, 63, 70, 1]" = torch.ops.aten.slice.Tensor(slice_357, 1, 0, -7) slice_359: "f32[1, 63, 3, 1]" = torch.ops.aten.slice.Tensor(slice_358, 2, -3, 9223372036854775807) slice_scatter_80: "f32[1, 63, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_359, copy_20, 3, 0, 9223372036854775807); slice_359 = copy_20 = None slice_scatter_81: "f32[1, 63, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_358, slice_scatter_80, 2, -3, 9223372036854775807); slice_358 = slice_scatter_80 = None slice_scatter_82: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_357, slice_scatter_81, 1, 0, -7); slice_357 = slice_scatter_81 = None slice_scatter_83: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_79, slice_scatter_82, 0, 0, 9223372036854775807); slice_scatter_79 = slice_scatter_82 = None _tensor_constant27 = self._tensor_constant27 lift_fresh_copy_27: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant27); _tensor_constant27 = None slice_368: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_83, 0, 0, 9223372036854775807) slice_369: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_368, 1, -7, -3); slice_368 = None slice_370: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(slice_369, 2, 0, -7); slice_369 = None slice_371: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(slice_370, 3, 0, 9223372036854775807); slice_370 = None copy_21: "f32[1, 4, 63, 1]" = torch.ops.aten.copy.default(slice_371, lift_fresh_copy_27); slice_371 = lift_fresh_copy_27 = None slice_372: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_83, 0, 0, 9223372036854775807) slice_373: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_372, 1, -7, -3) slice_374: "f32[1, 4, 63, 1]" = torch.ops.aten.slice.Tensor(slice_373, 2, 0, -7) slice_scatter_84: "f32[1, 4, 63, 1]" = torch.ops.aten.slice_scatter.default(slice_374, copy_21, 3, 0, 9223372036854775807); slice_374 = copy_21 = None slice_scatter_85: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_373, slice_scatter_84, 2, 0, -7); slice_373 = slice_scatter_84 = None slice_scatter_86: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_372, slice_scatter_85, 1, -7, -3); slice_372 = slice_scatter_85 = None slice_scatter_87: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_83, slice_scatter_86, 0, 0, 9223372036854775807); slice_scatter_83 = slice_scatter_86 = None _tensor_constant28 = self._tensor_constant28 lift_fresh_copy_28: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant28); _tensor_constant28 = None slice_383: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_87, 0, 0, 9223372036854775807) slice_384: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_383, 1, -7, -3); slice_383 = None slice_385: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_384, 2, -7, -3); slice_384 = None slice_386: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_385, 3, 0, 9223372036854775807); slice_385 = None copy_22: "f32[1, 4, 4, 1]" = torch.ops.aten.copy.default(slice_386, lift_fresh_copy_28); slice_386 = lift_fresh_copy_28 = None slice_387: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_87, 0, 0, 9223372036854775807) slice_388: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_387, 1, -7, -3) slice_389: "f32[1, 4, 4, 1]" = torch.ops.aten.slice.Tensor(slice_388, 2, -7, -3) slice_scatter_88: "f32[1, 4, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_389, copy_22, 3, 0, 9223372036854775807); slice_389 = copy_22 = None slice_scatter_89: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_388, slice_scatter_88, 2, -7, -3); slice_388 = slice_scatter_88 = None slice_scatter_90: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_387, slice_scatter_89, 1, -7, -3); slice_387 = slice_scatter_89 = None slice_scatter_91: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_87, slice_scatter_90, 0, 0, 9223372036854775807); slice_scatter_87 = slice_scatter_90 = None _tensor_constant29 = self._tensor_constant29 lift_fresh_copy_29: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant29); _tensor_constant29 = None slice_398: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_91, 0, 0, 9223372036854775807) slice_399: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_398, 1, -7, -3); slice_398 = None slice_400: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_399, 2, -3, 9223372036854775807); slice_399 = None slice_401: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_400, 3, 0, 9223372036854775807); slice_400 = None copy_23: "f32[1, 4, 3, 1]" = torch.ops.aten.copy.default(slice_401, lift_fresh_copy_29); slice_401 = lift_fresh_copy_29 = None slice_402: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_91, 0, 0, 9223372036854775807) slice_403: "f32[1, 4, 70, 1]" = torch.ops.aten.slice.Tensor(slice_402, 1, -7, -3) slice_404: "f32[1, 4, 3, 1]" = torch.ops.aten.slice.Tensor(slice_403, 2, -3, 9223372036854775807) slice_scatter_92: "f32[1, 4, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_404, copy_23, 3, 0, 9223372036854775807); slice_404 = copy_23 = None slice_scatter_93: "f32[1, 4, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_403, slice_scatter_92, 2, -3, 9223372036854775807); slice_403 = slice_scatter_92 = None slice_scatter_94: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_402, slice_scatter_93, 1, -7, -3); slice_402 = slice_scatter_93 = None slice_scatter_95: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_91, slice_scatter_94, 0, 0, 9223372036854775807); slice_scatter_91 = slice_scatter_94 = None _tensor_constant30 = self._tensor_constant30 lift_fresh_copy_30: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant30); _tensor_constant30 = None slice_413: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_95, 0, 0, 9223372036854775807) slice_414: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_413, 1, -3, 9223372036854775807); slice_413 = None slice_415: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(slice_414, 2, 0, -7); slice_414 = None slice_416: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(slice_415, 3, 0, 9223372036854775807); slice_415 = None copy_24: "f32[1, 3, 63, 1]" = torch.ops.aten.copy.default(slice_416, lift_fresh_copy_30); slice_416 = lift_fresh_copy_30 = None slice_417: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_95, 0, 0, 9223372036854775807) slice_418: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_417, 1, -3, 9223372036854775807) slice_419: "f32[1, 3, 63, 1]" = torch.ops.aten.slice.Tensor(slice_418, 2, 0, -7) slice_scatter_96: "f32[1, 3, 63, 1]" = torch.ops.aten.slice_scatter.default(slice_419, copy_24, 3, 0, 9223372036854775807); slice_419 = copy_24 = None slice_scatter_97: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_418, slice_scatter_96, 2, 0, -7); slice_418 = slice_scatter_96 = None slice_scatter_98: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_417, slice_scatter_97, 1, -3, 9223372036854775807); slice_417 = slice_scatter_97 = None slice_scatter_99: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_95, slice_scatter_98, 0, 0, 9223372036854775807); slice_scatter_95 = slice_scatter_98 = None _tensor_constant31 = self._tensor_constant31 lift_fresh_copy_31: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant31); _tensor_constant31 = None slice_428: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_99, 0, 0, 9223372036854775807) slice_429: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_428, 1, -3, 9223372036854775807); slice_428 = None slice_430: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_429, 2, -7, -3); slice_429 = None slice_431: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_430, 3, 0, 9223372036854775807); slice_430 = None copy_25: "f32[1, 3, 4, 1]" = torch.ops.aten.copy.default(slice_431, lift_fresh_copy_31); slice_431 = lift_fresh_copy_31 = None slice_432: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_99, 0, 0, 9223372036854775807) slice_433: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_432, 1, -3, 9223372036854775807) slice_434: "f32[1, 3, 4, 1]" = torch.ops.aten.slice.Tensor(slice_433, 2, -7, -3) slice_scatter_100: "f32[1, 3, 4, 1]" = torch.ops.aten.slice_scatter.default(slice_434, copy_25, 3, 0, 9223372036854775807); slice_434 = copy_25 = None slice_scatter_101: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_433, slice_scatter_100, 2, -7, -3); slice_433 = slice_scatter_100 = None slice_scatter_102: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_432, slice_scatter_101, 1, -3, 9223372036854775807); slice_432 = slice_scatter_101 = None slice_scatter_103: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_99, slice_scatter_102, 0, 0, 9223372036854775807); slice_scatter_99 = slice_scatter_102 = None _tensor_constant32 = self._tensor_constant32 lift_fresh_copy_32: "f32[]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant32); _tensor_constant32 = None slice_443: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_103, 0, 0, 9223372036854775807) slice_444: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_443, 1, -3, 9223372036854775807); slice_443 = None slice_445: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_444, 2, -3, 9223372036854775807); slice_444 = None slice_446: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_445, 3, 0, 9223372036854775807); slice_445 = None copy_26: "f32[1, 3, 3, 1]" = torch.ops.aten.copy.default(slice_446, lift_fresh_copy_32); slice_446 = lift_fresh_copy_32 = None slice_447: "f32[1, 70, 70, 1]" = torch.ops.aten.slice.Tensor(slice_scatter_103, 0, 0, 9223372036854775807) slice_448: "f32[1, 3, 70, 1]" = torch.ops.aten.slice.Tensor(slice_447, 1, -3, 9223372036854775807) slice_449: "f32[1, 3, 3, 1]" = torch.ops.aten.slice.Tensor(slice_448, 2, -3, 9223372036854775807) slice_scatter_104: "f32[1, 3, 3, 1]" = torch.ops.aten.slice_scatter.default(slice_449, copy_26, 3, 0, 9223372036854775807); slice_449 = copy_26 = None slice_scatter_105: "f32[1, 3, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_448, slice_scatter_104, 2, -3, 9223372036854775807); slice_448 = slice_scatter_104 = None slice_scatter_106: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_447, slice_scatter_105, 1, -3, 9223372036854775807); slice_447 = slice_scatter_105 = None slice_scatter_107: "f32[1, 70, 70, 1]" = torch.ops.aten.slice_scatter.default(slice_scatter_103, slice_scatter_106, 0, 0, 9223372036854775807); slice_scatter_103 = slice_scatter_106 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) view_130: "f32[1, 10, 7, 10, 7, 1]" = torch.ops.aten.view.default(slice_scatter_107, [1, 10, 7, 10, 7, 1]); slice_scatter_107 = None permute_52: "f32[1, 10, 10, 7, 7, 1]" = torch.ops.aten.permute.default(view_130, [0, 1, 3, 2, 4, 5]); view_130 = None clone_54: "f32[1, 10, 10, 7, 7, 1]" = torch.ops.aten.clone.default(permute_52, memory_format = torch.contiguous_format); permute_52 = None view_131: "f32[100, 7, 7, 1]" = torch.ops.aten.view.default(clone_54, [-1, 7, 7, 1]); clone_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:458 in forward, code: mask_windows = mask_windows.view(-1, view_132: "f32[100, 49]" = torch.ops.aten.view.default(view_131, [-1, 49]); view_131 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:460 in forward, code: attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) unsqueeze_12: "f32[100, 1, 49]" = torch.ops.aten.unsqueeze.default(view_132, 1) unsqueeze_13: "f32[100, 49, 1]" = torch.ops.aten.unsqueeze.default(view_132, 2); view_132 = None sub_19: "f32[100, 49, 49]" = torch.ops.aten.sub.Tensor(unsqueeze_12, unsqueeze_13); unsqueeze_12 = unsqueeze_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:461 in forward, code: attn_mask = attn_mask.masked_fill(attn_mask != 0, ne_2: "b8[100, 49, 49]" = torch.ops.aten.ne.Scalar(sub_19, 0) scalar_tensor_4: "f32[]" = torch.ops.aten.scalar_tensor.default(-100.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where_4: "f32[100, 49, 49]" = torch.ops.aten.where.self(ne_2, scalar_tensor_4, sub_19); ne_2 = scalar_tensor_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:463 in forward, code: attn_mask == 0, float(0.0)) eq_2: "b8[100, 49, 49]" = torch.ops.aten.eq.Scalar(sub_19, 0); sub_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:462 in forward, code: float(-100.0)).masked_fill( scalar_tensor_5: "f32[]" = torch.ops.aten.scalar_tensor.default(0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0)) where_5: "f32[100, 49, 49]" = torch.ops.aten.where.self(eq_2, scalar_tensor_5, where_4); eq_2 = scalar_tensor_5 = where_4 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_140: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(view_127, torch.float32) var_mean_13 = torch.ops.aten.var_mean.correction(convert_element_type_140, [2], correction = 0, keepdim = True) getitem_26: "f32[1, 4624, 1]" = var_mean_13[0] getitem_27: "f32[1, 4624, 1]" = var_mean_13[1]; var_mean_13 = None add_52: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_26, 1e-05); getitem_26 = None rsqrt_13: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_52); add_52 = None sub_20: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_140, getitem_27); convert_element_type_140 = getitem_27 = None mul_48: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_20, rsqrt_13); sub_20 = rsqrt_13 = None mul_49: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_48, arg71_1); mul_48 = arg71_1 = None add_53: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_49, arg72_1); mul_49 = arg72_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_133: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_53, [1, 68, 68, 512]); add_53 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_4: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_133, [0, 0, 0, 2, 0, 2], 0.0); view_133 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_134: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_4, [1, 10, 7, 10, 7, 512]); constant_pad_nd_4 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_53: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_134, [0, 1, 3, 2, 4, 5]); view_134 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_55: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_53, memory_format = torch.contiguous_format); permute_53 = None view_135: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_55, [-1, 7, 7, 512]); clone_55 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_136: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_135, [-1, 49, 512]); view_135 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_141: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg74_1, torch.float16); arg74_1 = None convert_element_type_142: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg73_1, torch.float16); arg73_1 = None convert_element_type_143: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_136, torch.float16); view_136 = None view_137: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_143, [4900, 512]); convert_element_type_143 = None permute_54: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_142, [1, 0]); convert_element_type_142 = None addmm_16: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_141, view_137, permute_54); convert_element_type_141 = view_137 = permute_54 = None view_138: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_16, [100, 49, 1536]); addmm_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_139: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_138, [100, 49, 3, 16, 32]); view_138 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_55: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_139, [2, 0, 3, 1, 4]); view_139 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_12: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 0) select_13: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 1) select_14: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_55, 0, 2); permute_55 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_50: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_12, 0.1767766952966369); select_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_56: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_13, [0, 1, 3, 2]); select_13 = None expand_16: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_50, [100, 16, 49, 32]); mul_50 = None clone_56: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_16, memory_format = torch.contiguous_format); expand_16 = None view_140: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_56, [1600, 49, 32]); clone_56 = None expand_17: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_56, [100, 16, 32, 49]); permute_56 = None clone_57: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_17, memory_format = torch.contiguous_format); expand_17 = None view_141: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_57, [1600, 32, 49]); clone_57 = None bmm_8: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_140, view_141); view_140 = view_141 = None view_142: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_8, [100, 16, 49, 49]); bmm_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_143: "i64[2401]" = torch.ops.aten.view.default(arg76_1, [-1]); arg76_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_12: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg75_1, [view_143]); arg75_1 = view_143 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_144: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_12, [49, 49, -1]); index_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_57: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_144, [2, 0, 1]); view_144 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_58: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_57, memory_format = torch.contiguous_format); permute_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_14: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_58, 0); clone_58 = None add_54: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_142, unsqueeze_14); view_142 = unsqueeze_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_4: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_54, [-1], True) sub_21: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_54, amax_4); add_54 = amax_4 = None exp_4: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_21); sub_21 = None sum_5: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_4, [-1], True) div_10: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_4, sum_5); exp_4 = sum_5 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_59: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_10); div_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_149: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_59, torch.float16); clone_59 = None expand_18: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_149, [100, 16, 49, 49]); convert_element_type_149 = None view_145: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_18, [1600, 49, 49]); expand_18 = None expand_19: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_14, [100, 16, 49, 32]); select_14 = None clone_60: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_19, memory_format = torch.contiguous_format); expand_19 = None view_146: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_60, [1600, 49, 32]); clone_60 = None bmm_9: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_145, view_146); view_145 = view_146 = None view_147: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_9, [100, 16, 49, 32]); bmm_9 = None permute_58: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_147, [0, 2, 1, 3]); view_147 = None clone_61: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_58, memory_format = torch.contiguous_format); permute_58 = None view_148: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_61, [100, 49, 512]); clone_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_152: "f16[512]" = torch.ops.prims.convert_element_type.default(arg78_1, torch.float16); arg78_1 = None convert_element_type_153: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg77_1, torch.float16); arg77_1 = None view_149: "f16[4900, 512]" = torch.ops.aten.view.default(view_148, [4900, 512]); view_148 = None permute_59: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_153, [1, 0]); convert_element_type_153 = None addmm_17: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_152, view_149, permute_59); convert_element_type_152 = view_149 = permute_59 = None view_150: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_17, [100, 49, 512]); addmm_17 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_62: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_150); view_150 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_151: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_62, [-1, 7, 7, 512]); clone_62 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_152: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_151, [1, 10, 10, 7, 7, -1]); view_151 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_60: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_152, [0, 1, 3, 2, 4, 5]); view_152 = None clone_63: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_60, memory_format = torch.contiguous_format); permute_60 = None view_153: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_63, [1, 70, 70, -1]); clone_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_454: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_153, 0, 0, 9223372036854775807); view_153 = None slice_455: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_454, 1, 0, 68); slice_454 = None slice_456: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_455, 2, 0, 68); slice_455 = None slice_457: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_456, 3, 0, 9223372036854775807); slice_456 = None clone_64: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_457, memory_format = torch.contiguous_format); slice_457 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_154: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_64, [1, 4624, 512]); clone_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_55: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(view_127, view_154); view_127 = view_154 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_157: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_55, torch.float32) var_mean_14 = torch.ops.aten.var_mean.correction(convert_element_type_157, [2], correction = 0, keepdim = True) getitem_28: "f32[1, 4624, 1]" = var_mean_14[0] getitem_29: "f32[1, 4624, 1]" = var_mean_14[1]; var_mean_14 = None add_56: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_28, 1e-05); getitem_28 = None rsqrt_14: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_56); add_56 = None sub_22: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_157, getitem_29); convert_element_type_157 = getitem_29 = None mul_51: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_22, rsqrt_14); sub_22 = rsqrt_14 = None mul_52: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_51, arg79_1); mul_51 = arg79_1 = None add_57: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_52, arg80_1); mul_52 = arg80_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_158: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg82_1, torch.float16); arg82_1 = None convert_element_type_159: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg81_1, torch.float16); arg81_1 = None convert_element_type_160: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_57, torch.float16); add_57 = None view_155: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_160, [4624, 512]); convert_element_type_160 = None permute_61: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_159, [1, 0]); convert_element_type_159 = None addmm_18: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_158, view_155, permute_61); convert_element_type_158 = view_155 = permute_61 = None view_156: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_18, [1, 4624, 2048]); addmm_18 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_164: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_156, torch.float32); view_156 = None mul_53: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_164, 0.5) mul_54: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_164, 0.7071067811865476); convert_element_type_164 = None erf_4: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_54); mul_54 = None add_58: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_4, 1); erf_4 = None mul_55: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_53, add_58); mul_53 = add_58 = None convert_element_type_165: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_55, torch.float16); mul_55 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_65: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_165); convert_element_type_165 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_166: "f16[512]" = torch.ops.prims.convert_element_type.default(arg84_1, torch.float16); arg84_1 = None convert_element_type_167: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg83_1, torch.float16); arg83_1 = None view_157: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_65, [4624, 2048]); clone_65 = None permute_62: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_167, [1, 0]); convert_element_type_167 = None addmm_19: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_166, view_157, permute_62); convert_element_type_166 = view_157 = permute_62 = None view_158: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_19, [1, 4624, 512]); addmm_19 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_66: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_158); view_158 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_59: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_55, clone_66); add_55 = clone_66 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_171: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_59, torch.float32) var_mean_15 = torch.ops.aten.var_mean.correction(convert_element_type_171, [2], correction = 0, keepdim = True) getitem_30: "f32[1, 4624, 1]" = var_mean_15[0] getitem_31: "f32[1, 4624, 1]" = var_mean_15[1]; var_mean_15 = None add_60: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_30, 1e-05); getitem_30 = None rsqrt_15: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_60); add_60 = None sub_23: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_171, getitem_31); convert_element_type_171 = getitem_31 = None mul_56: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_23, rsqrt_15); sub_23 = rsqrt_15 = None mul_57: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_56, arg85_1); mul_56 = arg85_1 = None add_61: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_57, arg86_1); mul_57 = arg86_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_159: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_61, [1, 68, 68, 512]); add_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_5: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_159, [0, 0, 0, 2, 0, 2], 0.0); view_159 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_8: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_62: "i64[70]" = torch.ops.aten.add.Tensor(iota_8, 3); iota_8 = None fmod_8: "i64[70]" = torch.ops.aten.fmod.Scalar(add_62, 70); add_62 = None slice_458: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_5, 0, 0, 9223372036854775807); constant_pad_nd_5 = None index_13: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_458, [None, fmod_8]); slice_458 = fmod_8 = None iota_9: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_63: "i64[70]" = torch.ops.aten.add.Tensor(iota_9, 3); iota_9 = None fmod_9: "i64[70]" = torch.ops.aten.fmod.Scalar(add_63, 70); add_63 = None slice_459: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_13, 0, 0, 9223372036854775807); index_13 = None slice_460: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_459, 1, 0, 9223372036854775807); slice_459 = None index_14: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_460, [None, None, fmod_9]); slice_460 = fmod_9 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_160: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_14, [1, 10, 7, 10, 7, 512]); index_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_63: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_160, [0, 1, 3, 2, 4, 5]); view_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_67: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_63, memory_format = torch.contiguous_format); permute_63 = None view_161: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_67, [-1, 7, 7, 512]); clone_67 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_162: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_161, [-1, 49, 512]); view_161 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_172: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg88_1, torch.float16); arg88_1 = None convert_element_type_173: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg87_1, torch.float16); arg87_1 = None convert_element_type_174: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_162, torch.float16); view_162 = None view_163: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_174, [4900, 512]); convert_element_type_174 = None permute_64: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_173, [1, 0]); convert_element_type_173 = None addmm_20: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_172, view_163, permute_64); convert_element_type_172 = view_163 = permute_64 = None view_164: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_20, [100, 49, 1536]); addmm_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_165: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_164, [100, 49, 3, 16, 32]); view_164 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_65: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_165, [2, 0, 3, 1, 4]); view_165 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_15: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 0) select_16: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 1) select_17: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_65, 0, 2); permute_65 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_58: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_15, 0.1767766952966369); select_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_66: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_16, [0, 1, 3, 2]); select_16 = None expand_20: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_58, [100, 16, 49, 32]); mul_58 = None clone_68: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_20, memory_format = torch.contiguous_format); expand_20 = None view_166: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_68, [1600, 49, 32]); clone_68 = None expand_21: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_66, [100, 16, 32, 49]); permute_66 = None clone_69: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_21, memory_format = torch.contiguous_format); expand_21 = None view_167: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_69, [1600, 32, 49]); clone_69 = None bmm_10: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_166, view_167); view_166 = view_167 = None view_168: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_10, [100, 16, 49, 49]); bmm_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_169: "i64[2401]" = torch.ops.aten.view.default(arg90_1, [-1]); arg90_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_15: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg89_1, [view_169]); arg89_1 = view_169 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_170: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_15, [49, 49, -1]); index_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_67: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_170, [2, 0, 1]); view_170 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_70: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_67, memory_format = torch.contiguous_format); permute_67 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_15: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_70, 0); clone_70 = None add_64: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_168, unsqueeze_15); view_168 = unsqueeze_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_171: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_64, [1, 100, 16, 49, 49]); add_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_16: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_17: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_16, 0); unsqueeze_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_65: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_171, unsqueeze_17); view_171 = unsqueeze_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_172: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_65, [-1, 16, 49, 49]); add_65 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_5: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_172, [-1], True) sub_24: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_172, amax_5); view_172 = amax_5 = None exp_5: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_24); sub_24 = None sum_6: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_5, [-1], True) div_11: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_5, sum_6); exp_5 = sum_6 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_71: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_11); div_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_180: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_71, torch.float16); clone_71 = None expand_22: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_180, [100, 16, 49, 49]); convert_element_type_180 = None view_173: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_22, [1600, 49, 49]); expand_22 = None expand_23: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_17, [100, 16, 49, 32]); select_17 = None clone_72: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_23, memory_format = torch.contiguous_format); expand_23 = None view_174: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_72, [1600, 49, 32]); clone_72 = None bmm_11: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_173, view_174); view_173 = view_174 = None view_175: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_11, [100, 16, 49, 32]); bmm_11 = None permute_68: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_175, [0, 2, 1, 3]); view_175 = None clone_73: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_68, memory_format = torch.contiguous_format); permute_68 = None view_176: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_73, [100, 49, 512]); clone_73 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_183: "f16[512]" = torch.ops.prims.convert_element_type.default(arg92_1, torch.float16); arg92_1 = None convert_element_type_184: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg91_1, torch.float16); arg91_1 = None view_177: "f16[4900, 512]" = torch.ops.aten.view.default(view_176, [4900, 512]); view_176 = None permute_69: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_184, [1, 0]); convert_element_type_184 = None addmm_21: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_183, view_177, permute_69); convert_element_type_183 = view_177 = permute_69 = None view_178: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_21, [100, 49, 512]); addmm_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_74: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_178); view_178 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_179: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_74, [-1, 7, 7, 512]); clone_74 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_180: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_179, [1, 10, 10, 7, 7, -1]); view_179 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_70: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_180, [0, 1, 3, 2, 4, 5]); view_180 = None clone_75: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_70, memory_format = torch.contiguous_format); permute_70 = None view_181: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_75, [1, 70, 70, -1]); clone_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_10: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_66: "i64[70]" = torch.ops.aten.add.Tensor(iota_10, 67); iota_10 = None fmod_10: "i64[70]" = torch.ops.aten.fmod.Scalar(add_66, 70); add_66 = None slice_461: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_181, 0, 0, 9223372036854775807); view_181 = None index_16: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_461, [None, fmod_10]); slice_461 = fmod_10 = None iota_11: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_67: "i64[70]" = torch.ops.aten.add.Tensor(iota_11, 67); iota_11 = None fmod_11: "i64[70]" = torch.ops.aten.fmod.Scalar(add_67, 70); add_67 = None slice_462: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_16, 0, 0, 9223372036854775807); index_16 = None slice_463: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_462, 1, 0, 9223372036854775807); slice_462 = None index_17: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_463, [None, None, fmod_11]); slice_463 = fmod_11 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_464: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_17, 0, 0, 9223372036854775807); index_17 = None slice_465: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_464, 1, 0, 68); slice_464 = None slice_466: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_465, 2, 0, 68); slice_465 = None slice_467: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_466, 3, 0, 9223372036854775807); slice_466 = None clone_76: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_467, memory_format = torch.contiguous_format); slice_467 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_182: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_76, [1, 4624, 512]); clone_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_68: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_59, view_182); add_59 = view_182 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_188: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_68, torch.float32) var_mean_16 = torch.ops.aten.var_mean.correction(convert_element_type_188, [2], correction = 0, keepdim = True) getitem_32: "f32[1, 4624, 1]" = var_mean_16[0] getitem_33: "f32[1, 4624, 1]" = var_mean_16[1]; var_mean_16 = None add_69: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_32, 1e-05); getitem_32 = None rsqrt_16: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_69); add_69 = None sub_25: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_188, getitem_33); convert_element_type_188 = getitem_33 = None mul_59: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_25, rsqrt_16); sub_25 = rsqrt_16 = None mul_60: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_59, arg93_1); mul_59 = arg93_1 = None add_70: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_60, arg94_1); mul_60 = arg94_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_189: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg96_1, torch.float16); arg96_1 = None convert_element_type_190: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg95_1, torch.float16); arg95_1 = None convert_element_type_191: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_70, torch.float16); add_70 = None view_183: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_191, [4624, 512]); convert_element_type_191 = None permute_71: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_190, [1, 0]); convert_element_type_190 = None addmm_22: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_189, view_183, permute_71); convert_element_type_189 = view_183 = permute_71 = None view_184: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_22, [1, 4624, 2048]); addmm_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_195: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_184, torch.float32); view_184 = None mul_61: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_195, 0.5) mul_62: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_195, 0.7071067811865476); convert_element_type_195 = None erf_5: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_62); mul_62 = None add_71: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_5, 1); erf_5 = None mul_63: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_61, add_71); mul_61 = add_71 = None convert_element_type_196: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_63, torch.float16); mul_63 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_77: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_196); convert_element_type_196 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_197: "f16[512]" = torch.ops.prims.convert_element_type.default(arg98_1, torch.float16); arg98_1 = None convert_element_type_198: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg97_1, torch.float16); arg97_1 = None view_185: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_77, [4624, 2048]); clone_77 = None permute_72: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_198, [1, 0]); convert_element_type_198 = None addmm_23: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_197, view_185, permute_72); convert_element_type_197 = view_185 = permute_72 = None view_186: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_23, [1, 4624, 512]); addmm_23 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_78: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_186); view_186 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_72: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_68, clone_78); add_68 = clone_78 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_202: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_72, torch.float32) var_mean_17 = torch.ops.aten.var_mean.correction(convert_element_type_202, [2], correction = 0, keepdim = True) getitem_34: "f32[1, 4624, 1]" = var_mean_17[0] getitem_35: "f32[1, 4624, 1]" = var_mean_17[1]; var_mean_17 = None add_73: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_34, 1e-05); getitem_34 = None rsqrt_17: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_73); add_73 = None sub_26: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_202, getitem_35); convert_element_type_202 = getitem_35 = None mul_64: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_26, rsqrt_17); sub_26 = rsqrt_17 = None mul_65: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_64, arg99_1); mul_64 = arg99_1 = None add_74: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_65, arg100_1); mul_65 = arg100_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_187: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_74, [1, 68, 68, 512]); add_74 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_6: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_187, [0, 0, 0, 2, 0, 2], 0.0); view_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_188: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_6, [1, 10, 7, 10, 7, 512]); constant_pad_nd_6 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_73: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_188, [0, 1, 3, 2, 4, 5]); view_188 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_79: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_73, memory_format = torch.contiguous_format); permute_73 = None view_189: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_79, [-1, 7, 7, 512]); clone_79 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_190: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_189, [-1, 49, 512]); view_189 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_203: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg102_1, torch.float16); arg102_1 = None convert_element_type_204: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg101_1, torch.float16); arg101_1 = None convert_element_type_205: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_190, torch.float16); view_190 = None view_191: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_205, [4900, 512]); convert_element_type_205 = None permute_74: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_204, [1, 0]); convert_element_type_204 = None addmm_24: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_203, view_191, permute_74); convert_element_type_203 = view_191 = permute_74 = None view_192: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_24, [100, 49, 1536]); addmm_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_193: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_192, [100, 49, 3, 16, 32]); view_192 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_75: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_193, [2, 0, 3, 1, 4]); view_193 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_18: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 0) select_19: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 1) select_20: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_75, 0, 2); permute_75 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_66: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_18, 0.1767766952966369); select_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_76: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_19, [0, 1, 3, 2]); select_19 = None expand_24: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_66, [100, 16, 49, 32]); mul_66 = None clone_80: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_24, memory_format = torch.contiguous_format); expand_24 = None view_194: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_80, [1600, 49, 32]); clone_80 = None expand_25: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_76, [100, 16, 32, 49]); permute_76 = None clone_81: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_25, memory_format = torch.contiguous_format); expand_25 = None view_195: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_81, [1600, 32, 49]); clone_81 = None bmm_12: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_194, view_195); view_194 = view_195 = None view_196: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_12, [100, 16, 49, 49]); bmm_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_197: "i64[2401]" = torch.ops.aten.view.default(arg104_1, [-1]); arg104_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_18: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg103_1, [view_197]); arg103_1 = view_197 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_198: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_18, [49, 49, -1]); index_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_77: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_198, [2, 0, 1]); view_198 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_82: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_77, memory_format = torch.contiguous_format); permute_77 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_18: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_82, 0); clone_82 = None add_75: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_196, unsqueeze_18); view_196 = unsqueeze_18 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_6: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_75, [-1], True) sub_27: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_75, amax_6); add_75 = amax_6 = None exp_6: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_27); sub_27 = None sum_7: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_6, [-1], True) div_12: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_6, sum_7); exp_6 = sum_7 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_83: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_12); div_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_211: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_83, torch.float16); clone_83 = None expand_26: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_211, [100, 16, 49, 49]); convert_element_type_211 = None view_199: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_26, [1600, 49, 49]); expand_26 = None expand_27: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_20, [100, 16, 49, 32]); select_20 = None clone_84: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_27, memory_format = torch.contiguous_format); expand_27 = None view_200: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_84, [1600, 49, 32]); clone_84 = None bmm_13: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_199, view_200); view_199 = view_200 = None view_201: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_13, [100, 16, 49, 32]); bmm_13 = None permute_78: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_201, [0, 2, 1, 3]); view_201 = None clone_85: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_78, memory_format = torch.contiguous_format); permute_78 = None view_202: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_85, [100, 49, 512]); clone_85 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_214: "f16[512]" = torch.ops.prims.convert_element_type.default(arg106_1, torch.float16); arg106_1 = None convert_element_type_215: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg105_1, torch.float16); arg105_1 = None view_203: "f16[4900, 512]" = torch.ops.aten.view.default(view_202, [4900, 512]); view_202 = None permute_79: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_215, [1, 0]); convert_element_type_215 = None addmm_25: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_214, view_203, permute_79); convert_element_type_214 = view_203 = permute_79 = None view_204: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_25, [100, 49, 512]); addmm_25 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_86: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_204); view_204 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_205: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_86, [-1, 7, 7, 512]); clone_86 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_206: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_205, [1, 10, 10, 7, 7, -1]); view_205 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_80: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_206, [0, 1, 3, 2, 4, 5]); view_206 = None clone_87: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_80, memory_format = torch.contiguous_format); permute_80 = None view_207: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_87, [1, 70, 70, -1]); clone_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_468: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_207, 0, 0, 9223372036854775807); view_207 = None slice_469: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_468, 1, 0, 68); slice_468 = None slice_470: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_469, 2, 0, 68); slice_469 = None slice_471: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_470, 3, 0, 9223372036854775807); slice_470 = None clone_88: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_471, memory_format = torch.contiguous_format); slice_471 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_208: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_88, [1, 4624, 512]); clone_88 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_76: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_72, view_208); add_72 = view_208 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_219: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_76, torch.float32) var_mean_18 = torch.ops.aten.var_mean.correction(convert_element_type_219, [2], correction = 0, keepdim = True) getitem_36: "f32[1, 4624, 1]" = var_mean_18[0] getitem_37: "f32[1, 4624, 1]" = var_mean_18[1]; var_mean_18 = None add_77: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_36, 1e-05); getitem_36 = None rsqrt_18: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_77); add_77 = None sub_28: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_219, getitem_37); convert_element_type_219 = getitem_37 = None mul_67: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_28, rsqrt_18); sub_28 = rsqrt_18 = None mul_68: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_67, arg107_1); mul_67 = arg107_1 = None add_78: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_68, arg108_1); mul_68 = arg108_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_220: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg110_1, torch.float16); arg110_1 = None convert_element_type_221: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg109_1, torch.float16); arg109_1 = None convert_element_type_222: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_78, torch.float16); add_78 = None view_209: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_222, [4624, 512]); convert_element_type_222 = None permute_81: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_221, [1, 0]); convert_element_type_221 = None addmm_26: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_220, view_209, permute_81); convert_element_type_220 = view_209 = permute_81 = None view_210: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_26, [1, 4624, 2048]); addmm_26 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_226: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_210, torch.float32); view_210 = None mul_69: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_226, 0.5) mul_70: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_226, 0.7071067811865476); convert_element_type_226 = None erf_6: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_70); mul_70 = None add_79: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_6, 1); erf_6 = None mul_71: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_69, add_79); mul_69 = add_79 = None convert_element_type_227: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_71, torch.float16); mul_71 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_89: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_227); convert_element_type_227 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_228: "f16[512]" = torch.ops.prims.convert_element_type.default(arg112_1, torch.float16); arg112_1 = None convert_element_type_229: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg111_1, torch.float16); arg111_1 = None view_211: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_89, [4624, 2048]); clone_89 = None permute_82: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_229, [1, 0]); convert_element_type_229 = None addmm_27: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_228, view_211, permute_82); convert_element_type_228 = view_211 = permute_82 = None view_212: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_27, [1, 4624, 512]); addmm_27 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_90: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_212); view_212 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_80: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_76, clone_90); add_76 = clone_90 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_233: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_80, torch.float32) var_mean_19 = torch.ops.aten.var_mean.correction(convert_element_type_233, [2], correction = 0, keepdim = True) getitem_38: "f32[1, 4624, 1]" = var_mean_19[0] getitem_39: "f32[1, 4624, 1]" = var_mean_19[1]; var_mean_19 = None add_81: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_38, 1e-05); getitem_38 = None rsqrt_19: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_81); add_81 = None sub_29: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_233, getitem_39); convert_element_type_233 = getitem_39 = None mul_72: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_29, rsqrt_19); sub_29 = rsqrt_19 = None mul_73: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_72, arg113_1); mul_72 = arg113_1 = None add_82: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_73, arg114_1); mul_73 = arg114_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_213: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_82, [1, 68, 68, 512]); add_82 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_7: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_213, [0, 0, 0, 2, 0, 2], 0.0); view_213 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_12: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_83: "i64[70]" = torch.ops.aten.add.Tensor(iota_12, 3); iota_12 = None fmod_12: "i64[70]" = torch.ops.aten.fmod.Scalar(add_83, 70); add_83 = None slice_472: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_7, 0, 0, 9223372036854775807); constant_pad_nd_7 = None index_19: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_472, [None, fmod_12]); slice_472 = fmod_12 = None iota_13: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_84: "i64[70]" = torch.ops.aten.add.Tensor(iota_13, 3); iota_13 = None fmod_13: "i64[70]" = torch.ops.aten.fmod.Scalar(add_84, 70); add_84 = None slice_473: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_19, 0, 0, 9223372036854775807); index_19 = None slice_474: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_473, 1, 0, 9223372036854775807); slice_473 = None index_20: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_474, [None, None, fmod_13]); slice_474 = fmod_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_214: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_20, [1, 10, 7, 10, 7, 512]); index_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_83: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_214, [0, 1, 3, 2, 4, 5]); view_214 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_91: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_83, memory_format = torch.contiguous_format); permute_83 = None view_215: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_91, [-1, 7, 7, 512]); clone_91 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_216: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_215, [-1, 49, 512]); view_215 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_234: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg116_1, torch.float16); arg116_1 = None convert_element_type_235: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg115_1, torch.float16); arg115_1 = None convert_element_type_236: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_216, torch.float16); view_216 = None view_217: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_236, [4900, 512]); convert_element_type_236 = None permute_84: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_235, [1, 0]); convert_element_type_235 = None addmm_28: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_234, view_217, permute_84); convert_element_type_234 = view_217 = permute_84 = None view_218: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_28, [100, 49, 1536]); addmm_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_219: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_218, [100, 49, 3, 16, 32]); view_218 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_85: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_219, [2, 0, 3, 1, 4]); view_219 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_21: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 0) select_22: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 1) select_23: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_85, 0, 2); permute_85 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_74: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_21, 0.1767766952966369); select_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_86: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_22, [0, 1, 3, 2]); select_22 = None expand_28: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_74, [100, 16, 49, 32]); mul_74 = None clone_92: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_28, memory_format = torch.contiguous_format); expand_28 = None view_220: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_92, [1600, 49, 32]); clone_92 = None expand_29: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_86, [100, 16, 32, 49]); permute_86 = None clone_93: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_29, memory_format = torch.contiguous_format); expand_29 = None view_221: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_93, [1600, 32, 49]); clone_93 = None bmm_14: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_220, view_221); view_220 = view_221 = None view_222: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_14, [100, 16, 49, 49]); bmm_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_223: "i64[2401]" = torch.ops.aten.view.default(arg118_1, [-1]); arg118_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_21: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg117_1, [view_223]); arg117_1 = view_223 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_224: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_21, [49, 49, -1]); index_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_87: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_224, [2, 0, 1]); view_224 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_94: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_87, memory_format = torch.contiguous_format); permute_87 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_19: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_94, 0); clone_94 = None add_85: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_222, unsqueeze_19); view_222 = unsqueeze_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_225: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_85, [1, 100, 16, 49, 49]); add_85 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_20: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_21: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_20, 0); unsqueeze_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_86: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_225, unsqueeze_21); view_225 = unsqueeze_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_226: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_86, [-1, 16, 49, 49]); add_86 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_7: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_226, [-1], True) sub_30: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_226, amax_7); view_226 = amax_7 = None exp_7: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_30); sub_30 = None sum_8: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_7, [-1], True) div_13: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_7, sum_8); exp_7 = sum_8 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_95: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_13); div_13 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_242: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_95, torch.float16); clone_95 = None expand_30: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_242, [100, 16, 49, 49]); convert_element_type_242 = None view_227: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_30, [1600, 49, 49]); expand_30 = None expand_31: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_23, [100, 16, 49, 32]); select_23 = None clone_96: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_31, memory_format = torch.contiguous_format); expand_31 = None view_228: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_96, [1600, 49, 32]); clone_96 = None bmm_15: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_227, view_228); view_227 = view_228 = None view_229: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_15, [100, 16, 49, 32]); bmm_15 = None permute_88: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_229, [0, 2, 1, 3]); view_229 = None clone_97: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_88, memory_format = torch.contiguous_format); permute_88 = None view_230: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_97, [100, 49, 512]); clone_97 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_245: "f16[512]" = torch.ops.prims.convert_element_type.default(arg120_1, torch.float16); arg120_1 = None convert_element_type_246: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg119_1, torch.float16); arg119_1 = None view_231: "f16[4900, 512]" = torch.ops.aten.view.default(view_230, [4900, 512]); view_230 = None permute_89: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_246, [1, 0]); convert_element_type_246 = None addmm_29: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_245, view_231, permute_89); convert_element_type_245 = view_231 = permute_89 = None view_232: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_29, [100, 49, 512]); addmm_29 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_98: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_232); view_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_233: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_98, [-1, 7, 7, 512]); clone_98 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_234: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_233, [1, 10, 10, 7, 7, -1]); view_233 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_90: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_234, [0, 1, 3, 2, 4, 5]); view_234 = None clone_99: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_90, memory_format = torch.contiguous_format); permute_90 = None view_235: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_99, [1, 70, 70, -1]); clone_99 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_14: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_87: "i64[70]" = torch.ops.aten.add.Tensor(iota_14, 67); iota_14 = None fmod_14: "i64[70]" = torch.ops.aten.fmod.Scalar(add_87, 70); add_87 = None slice_475: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_235, 0, 0, 9223372036854775807); view_235 = None index_22: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_475, [None, fmod_14]); slice_475 = fmod_14 = None iota_15: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_88: "i64[70]" = torch.ops.aten.add.Tensor(iota_15, 67); iota_15 = None fmod_15: "i64[70]" = torch.ops.aten.fmod.Scalar(add_88, 70); add_88 = None slice_476: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_22, 0, 0, 9223372036854775807); index_22 = None slice_477: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_476, 1, 0, 9223372036854775807); slice_476 = None index_23: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_477, [None, None, fmod_15]); slice_477 = fmod_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_478: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_23, 0, 0, 9223372036854775807); index_23 = None slice_479: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_478, 1, 0, 68); slice_478 = None slice_480: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_479, 2, 0, 68); slice_479 = None slice_481: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_480, 3, 0, 9223372036854775807); slice_480 = None clone_100: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_481, memory_format = torch.contiguous_format); slice_481 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_236: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_100, [1, 4624, 512]); clone_100 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_89: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_80, view_236); add_80 = view_236 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_250: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_89, torch.float32) var_mean_20 = torch.ops.aten.var_mean.correction(convert_element_type_250, [2], correction = 0, keepdim = True) getitem_40: "f32[1, 4624, 1]" = var_mean_20[0] getitem_41: "f32[1, 4624, 1]" = var_mean_20[1]; var_mean_20 = None add_90: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_40, 1e-05); getitem_40 = None rsqrt_20: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_90); add_90 = None sub_31: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_250, getitem_41); convert_element_type_250 = getitem_41 = None mul_75: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_31, rsqrt_20); sub_31 = rsqrt_20 = None mul_76: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_75, arg121_1); mul_75 = arg121_1 = None add_91: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_76, arg122_1); mul_76 = arg122_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_251: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg124_1, torch.float16); arg124_1 = None convert_element_type_252: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg123_1, torch.float16); arg123_1 = None convert_element_type_253: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_91, torch.float16); add_91 = None view_237: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_253, [4624, 512]); convert_element_type_253 = None permute_91: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_252, [1, 0]); convert_element_type_252 = None addmm_30: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_251, view_237, permute_91); convert_element_type_251 = view_237 = permute_91 = None view_238: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_30, [1, 4624, 2048]); addmm_30 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_257: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_238, torch.float32); view_238 = None mul_77: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_257, 0.5) mul_78: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_257, 0.7071067811865476); convert_element_type_257 = None erf_7: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_78); mul_78 = None add_92: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_7, 1); erf_7 = None mul_79: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_77, add_92); mul_77 = add_92 = None convert_element_type_258: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_79, torch.float16); mul_79 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_101: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_258); convert_element_type_258 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_259: "f16[512]" = torch.ops.prims.convert_element_type.default(arg126_1, torch.float16); arg126_1 = None convert_element_type_260: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg125_1, torch.float16); arg125_1 = None view_239: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_101, [4624, 2048]); clone_101 = None permute_92: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_260, [1, 0]); convert_element_type_260 = None addmm_31: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_259, view_239, permute_92); convert_element_type_259 = view_239 = permute_92 = None view_240: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_31, [1, 4624, 512]); addmm_31 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_102: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_240); view_240 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_93: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_89, clone_102); add_89 = clone_102 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_264: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_93, torch.float32) var_mean_21 = torch.ops.aten.var_mean.correction(convert_element_type_264, [2], correction = 0, keepdim = True) getitem_42: "f32[1, 4624, 1]" = var_mean_21[0] getitem_43: "f32[1, 4624, 1]" = var_mean_21[1]; var_mean_21 = None add_94: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_42, 1e-05); getitem_42 = None rsqrt_21: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_94); add_94 = None sub_32: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_264, getitem_43); convert_element_type_264 = getitem_43 = None mul_80: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_32, rsqrt_21); sub_32 = rsqrt_21 = None mul_81: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_80, arg127_1); mul_80 = arg127_1 = None add_95: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_81, arg128_1); mul_81 = arg128_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_241: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_95, [1, 68, 68, 512]); add_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_8: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_241, [0, 0, 0, 2, 0, 2], 0.0); view_241 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_242: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_8, [1, 10, 7, 10, 7, 512]); constant_pad_nd_8 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_93: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_242, [0, 1, 3, 2, 4, 5]); view_242 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_103: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_93, memory_format = torch.contiguous_format); permute_93 = None view_243: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_103, [-1, 7, 7, 512]); clone_103 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_244: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_243, [-1, 49, 512]); view_243 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_265: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg130_1, torch.float16); arg130_1 = None convert_element_type_266: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg129_1, torch.float16); arg129_1 = None convert_element_type_267: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_244, torch.float16); view_244 = None view_245: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_267, [4900, 512]); convert_element_type_267 = None permute_94: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_266, [1, 0]); convert_element_type_266 = None addmm_32: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_265, view_245, permute_94); convert_element_type_265 = view_245 = permute_94 = None view_246: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_32, [100, 49, 1536]); addmm_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_247: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_246, [100, 49, 3, 16, 32]); view_246 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_95: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_247, [2, 0, 3, 1, 4]); view_247 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_24: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 0) select_25: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 1) select_26: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_95, 0, 2); permute_95 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_82: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_24, 0.1767766952966369); select_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_96: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_25, [0, 1, 3, 2]); select_25 = None expand_32: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_82, [100, 16, 49, 32]); mul_82 = None clone_104: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_32, memory_format = torch.contiguous_format); expand_32 = None view_248: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_104, [1600, 49, 32]); clone_104 = None expand_33: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_96, [100, 16, 32, 49]); permute_96 = None clone_105: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_33, memory_format = torch.contiguous_format); expand_33 = None view_249: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_105, [1600, 32, 49]); clone_105 = None bmm_16: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_248, view_249); view_248 = view_249 = None view_250: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_16, [100, 16, 49, 49]); bmm_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_251: "i64[2401]" = torch.ops.aten.view.default(arg132_1, [-1]); arg132_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_24: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg131_1, [view_251]); arg131_1 = view_251 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_252: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_24, [49, 49, -1]); index_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_97: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_252, [2, 0, 1]); view_252 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_106: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_97, memory_format = torch.contiguous_format); permute_97 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_22: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_106, 0); clone_106 = None add_96: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_250, unsqueeze_22); view_250 = unsqueeze_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_8: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_96, [-1], True) sub_33: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_96, amax_8); add_96 = amax_8 = None exp_8: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_33); sub_33 = None sum_9: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_8, [-1], True) div_14: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_8, sum_9); exp_8 = sum_9 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_107: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_14); div_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_273: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_107, torch.float16); clone_107 = None expand_34: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_273, [100, 16, 49, 49]); convert_element_type_273 = None view_253: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_34, [1600, 49, 49]); expand_34 = None expand_35: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_26, [100, 16, 49, 32]); select_26 = None clone_108: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_35, memory_format = torch.contiguous_format); expand_35 = None view_254: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_108, [1600, 49, 32]); clone_108 = None bmm_17: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_253, view_254); view_253 = view_254 = None view_255: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_17, [100, 16, 49, 32]); bmm_17 = None permute_98: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_255, [0, 2, 1, 3]); view_255 = None clone_109: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_98, memory_format = torch.contiguous_format); permute_98 = None view_256: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_109, [100, 49, 512]); clone_109 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_276: "f16[512]" = torch.ops.prims.convert_element_type.default(arg134_1, torch.float16); arg134_1 = None convert_element_type_277: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg133_1, torch.float16); arg133_1 = None view_257: "f16[4900, 512]" = torch.ops.aten.view.default(view_256, [4900, 512]); view_256 = None permute_99: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_277, [1, 0]); convert_element_type_277 = None addmm_33: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_276, view_257, permute_99); convert_element_type_276 = view_257 = permute_99 = None view_258: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_33, [100, 49, 512]); addmm_33 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_110: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_258); view_258 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_259: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_110, [-1, 7, 7, 512]); clone_110 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_260: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_259, [1, 10, 10, 7, 7, -1]); view_259 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_100: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_260, [0, 1, 3, 2, 4, 5]); view_260 = None clone_111: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_100, memory_format = torch.contiguous_format); permute_100 = None view_261: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_111, [1, 70, 70, -1]); clone_111 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_482: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_261, 0, 0, 9223372036854775807); view_261 = None slice_483: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_482, 1, 0, 68); slice_482 = None slice_484: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_483, 2, 0, 68); slice_483 = None slice_485: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_484, 3, 0, 9223372036854775807); slice_484 = None clone_112: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_485, memory_format = torch.contiguous_format); slice_485 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_262: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_112, [1, 4624, 512]); clone_112 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_97: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_93, view_262); add_93 = view_262 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_281: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_97, torch.float32) var_mean_22 = torch.ops.aten.var_mean.correction(convert_element_type_281, [2], correction = 0, keepdim = True) getitem_44: "f32[1, 4624, 1]" = var_mean_22[0] getitem_45: "f32[1, 4624, 1]" = var_mean_22[1]; var_mean_22 = None add_98: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_44, 1e-05); getitem_44 = None rsqrt_22: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_98); add_98 = None sub_34: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_281, getitem_45); convert_element_type_281 = getitem_45 = None mul_83: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_34, rsqrt_22); sub_34 = rsqrt_22 = None mul_84: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_83, arg135_1); mul_83 = arg135_1 = None add_99: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_84, arg136_1); mul_84 = arg136_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_282: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg138_1, torch.float16); arg138_1 = None convert_element_type_283: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg137_1, torch.float16); arg137_1 = None convert_element_type_284: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_99, torch.float16); add_99 = None view_263: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_284, [4624, 512]); convert_element_type_284 = None permute_101: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_283, [1, 0]); convert_element_type_283 = None addmm_34: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_282, view_263, permute_101); convert_element_type_282 = view_263 = permute_101 = None view_264: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_34, [1, 4624, 2048]); addmm_34 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_288: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_264, torch.float32); view_264 = None mul_85: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_288, 0.5) mul_86: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_288, 0.7071067811865476); convert_element_type_288 = None erf_8: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_86); mul_86 = None add_100: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_8, 1); erf_8 = None mul_87: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_85, add_100); mul_85 = add_100 = None convert_element_type_289: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_87, torch.float16); mul_87 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_113: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_289); convert_element_type_289 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_290: "f16[512]" = torch.ops.prims.convert_element_type.default(arg140_1, torch.float16); arg140_1 = None convert_element_type_291: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg139_1, torch.float16); arg139_1 = None view_265: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_113, [4624, 2048]); clone_113 = None permute_102: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_291, [1, 0]); convert_element_type_291 = None addmm_35: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_290, view_265, permute_102); convert_element_type_290 = view_265 = permute_102 = None view_266: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_35, [1, 4624, 512]); addmm_35 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_114: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_266); view_266 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_101: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_97, clone_114); add_97 = clone_114 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_295: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_101, torch.float32) var_mean_23 = torch.ops.aten.var_mean.correction(convert_element_type_295, [2], correction = 0, keepdim = True) getitem_46: "f32[1, 4624, 1]" = var_mean_23[0] getitem_47: "f32[1, 4624, 1]" = var_mean_23[1]; var_mean_23 = None add_102: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_46, 1e-05); getitem_46 = None rsqrt_23: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_102); add_102 = None sub_35: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_295, getitem_47); convert_element_type_295 = getitem_47 = None mul_88: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_35, rsqrt_23); sub_35 = rsqrt_23 = None mul_89: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_88, arg141_1); mul_88 = arg141_1 = None add_103: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_89, arg142_1); mul_89 = arg142_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_267: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_103, [1, 68, 68, 512]); add_103 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_9: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_267, [0, 0, 0, 2, 0, 2], 0.0); view_267 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_16: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_104: "i64[70]" = torch.ops.aten.add.Tensor(iota_16, 3); iota_16 = None fmod_16: "i64[70]" = torch.ops.aten.fmod.Scalar(add_104, 70); add_104 = None slice_486: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_9, 0, 0, 9223372036854775807); constant_pad_nd_9 = None index_25: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_486, [None, fmod_16]); slice_486 = fmod_16 = None iota_17: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_105: "i64[70]" = torch.ops.aten.add.Tensor(iota_17, 3); iota_17 = None fmod_17: "i64[70]" = torch.ops.aten.fmod.Scalar(add_105, 70); add_105 = None slice_487: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_25, 0, 0, 9223372036854775807); index_25 = None slice_488: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_487, 1, 0, 9223372036854775807); slice_487 = None index_26: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_488, [None, None, fmod_17]); slice_488 = fmod_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_268: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_26, [1, 10, 7, 10, 7, 512]); index_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_103: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_268, [0, 1, 3, 2, 4, 5]); view_268 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_115: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_103, memory_format = torch.contiguous_format); permute_103 = None view_269: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_115, [-1, 7, 7, 512]); clone_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_270: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_269, [-1, 49, 512]); view_269 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_296: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg144_1, torch.float16); arg144_1 = None convert_element_type_297: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg143_1, torch.float16); arg143_1 = None convert_element_type_298: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_270, torch.float16); view_270 = None view_271: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_298, [4900, 512]); convert_element_type_298 = None permute_104: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_297, [1, 0]); convert_element_type_297 = None addmm_36: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_296, view_271, permute_104); convert_element_type_296 = view_271 = permute_104 = None view_272: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_36, [100, 49, 1536]); addmm_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_273: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_272, [100, 49, 3, 16, 32]); view_272 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_105: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_273, [2, 0, 3, 1, 4]); view_273 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_27: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 0) select_28: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 1) select_29: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_105, 0, 2); permute_105 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_90: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_27, 0.1767766952966369); select_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_106: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_28, [0, 1, 3, 2]); select_28 = None expand_36: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_90, [100, 16, 49, 32]); mul_90 = None clone_116: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_36, memory_format = torch.contiguous_format); expand_36 = None view_274: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_116, [1600, 49, 32]); clone_116 = None expand_37: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_106, [100, 16, 32, 49]); permute_106 = None clone_117: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_37, memory_format = torch.contiguous_format); expand_37 = None view_275: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_117, [1600, 32, 49]); clone_117 = None bmm_18: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_274, view_275); view_274 = view_275 = None view_276: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_18, [100, 16, 49, 49]); bmm_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_277: "i64[2401]" = torch.ops.aten.view.default(arg146_1, [-1]); arg146_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_27: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg145_1, [view_277]); arg145_1 = view_277 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_278: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_27, [49, 49, -1]); index_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_107: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_278, [2, 0, 1]); view_278 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_118: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_107, memory_format = torch.contiguous_format); permute_107 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_23: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_118, 0); clone_118 = None add_106: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_276, unsqueeze_23); view_276 = unsqueeze_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_279: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_106, [1, 100, 16, 49, 49]); add_106 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_24: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_25: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_24, 0); unsqueeze_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_107: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_279, unsqueeze_25); view_279 = unsqueeze_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_280: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_107, [-1, 16, 49, 49]); add_107 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_9: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_280, [-1], True) sub_36: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_280, amax_9); view_280 = amax_9 = None exp_9: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_36); sub_36 = None sum_10: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_9, [-1], True) div_15: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_9, sum_10); exp_9 = sum_10 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_119: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_15); div_15 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_304: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_119, torch.float16); clone_119 = None expand_38: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_304, [100, 16, 49, 49]); convert_element_type_304 = None view_281: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_38, [1600, 49, 49]); expand_38 = None expand_39: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_29, [100, 16, 49, 32]); select_29 = None clone_120: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_39, memory_format = torch.contiguous_format); expand_39 = None view_282: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_120, [1600, 49, 32]); clone_120 = None bmm_19: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_281, view_282); view_281 = view_282 = None view_283: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_19, [100, 16, 49, 32]); bmm_19 = None permute_108: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_283, [0, 2, 1, 3]); view_283 = None clone_121: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_108, memory_format = torch.contiguous_format); permute_108 = None view_284: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_121, [100, 49, 512]); clone_121 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_307: "f16[512]" = torch.ops.prims.convert_element_type.default(arg148_1, torch.float16); arg148_1 = None convert_element_type_308: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg147_1, torch.float16); arg147_1 = None view_285: "f16[4900, 512]" = torch.ops.aten.view.default(view_284, [4900, 512]); view_284 = None permute_109: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_308, [1, 0]); convert_element_type_308 = None addmm_37: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_307, view_285, permute_109); convert_element_type_307 = view_285 = permute_109 = None view_286: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_37, [100, 49, 512]); addmm_37 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_122: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_286); view_286 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_287: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_122, [-1, 7, 7, 512]); clone_122 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_288: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_287, [1, 10, 10, 7, 7, -1]); view_287 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_110: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_288, [0, 1, 3, 2, 4, 5]); view_288 = None clone_123: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_110, memory_format = torch.contiguous_format); permute_110 = None view_289: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_123, [1, 70, 70, -1]); clone_123 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_18: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_108: "i64[70]" = torch.ops.aten.add.Tensor(iota_18, 67); iota_18 = None fmod_18: "i64[70]" = torch.ops.aten.fmod.Scalar(add_108, 70); add_108 = None slice_489: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_289, 0, 0, 9223372036854775807); view_289 = None index_28: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_489, [None, fmod_18]); slice_489 = fmod_18 = None iota_19: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_109: "i64[70]" = torch.ops.aten.add.Tensor(iota_19, 67); iota_19 = None fmod_19: "i64[70]" = torch.ops.aten.fmod.Scalar(add_109, 70); add_109 = None slice_490: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_28, 0, 0, 9223372036854775807); index_28 = None slice_491: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_490, 1, 0, 9223372036854775807); slice_490 = None index_29: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_491, [None, None, fmod_19]); slice_491 = fmod_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_492: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_29, 0, 0, 9223372036854775807); index_29 = None slice_493: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_492, 1, 0, 68); slice_492 = None slice_494: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_493, 2, 0, 68); slice_493 = None slice_495: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_494, 3, 0, 9223372036854775807); slice_494 = None clone_124: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_495, memory_format = torch.contiguous_format); slice_495 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_290: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_124, [1, 4624, 512]); clone_124 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_110: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_101, view_290); add_101 = view_290 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_312: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_110, torch.float32) var_mean_24 = torch.ops.aten.var_mean.correction(convert_element_type_312, [2], correction = 0, keepdim = True) getitem_48: "f32[1, 4624, 1]" = var_mean_24[0] getitem_49: "f32[1, 4624, 1]" = var_mean_24[1]; var_mean_24 = None add_111: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_48, 1e-05); getitem_48 = None rsqrt_24: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_111); add_111 = None sub_37: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_312, getitem_49); convert_element_type_312 = getitem_49 = None mul_91: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_37, rsqrt_24); sub_37 = rsqrt_24 = None mul_92: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_91, arg149_1); mul_91 = arg149_1 = None add_112: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_92, arg150_1); mul_92 = arg150_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_313: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg152_1, torch.float16); arg152_1 = None convert_element_type_314: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg151_1, torch.float16); arg151_1 = None convert_element_type_315: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_112, torch.float16); add_112 = None view_291: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_315, [4624, 512]); convert_element_type_315 = None permute_111: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_314, [1, 0]); convert_element_type_314 = None addmm_38: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_313, view_291, permute_111); convert_element_type_313 = view_291 = permute_111 = None view_292: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_38, [1, 4624, 2048]); addmm_38 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_319: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_292, torch.float32); view_292 = None mul_93: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_319, 0.5) mul_94: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_319, 0.7071067811865476); convert_element_type_319 = None erf_9: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_94); mul_94 = None add_113: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_9, 1); erf_9 = None mul_95: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_93, add_113); mul_93 = add_113 = None convert_element_type_320: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_95, torch.float16); mul_95 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_125: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_320); convert_element_type_320 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_321: "f16[512]" = torch.ops.prims.convert_element_type.default(arg154_1, torch.float16); arg154_1 = None convert_element_type_322: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg153_1, torch.float16); arg153_1 = None view_293: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_125, [4624, 2048]); clone_125 = None permute_112: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_322, [1, 0]); convert_element_type_322 = None addmm_39: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_321, view_293, permute_112); convert_element_type_321 = view_293 = permute_112 = None view_294: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_39, [1, 4624, 512]); addmm_39 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_126: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_294); view_294 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_114: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_110, clone_126); add_110 = clone_126 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_326: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_114, torch.float32) var_mean_25 = torch.ops.aten.var_mean.correction(convert_element_type_326, [2], correction = 0, keepdim = True) getitem_50: "f32[1, 4624, 1]" = var_mean_25[0] getitem_51: "f32[1, 4624, 1]" = var_mean_25[1]; var_mean_25 = None add_115: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_50, 1e-05); getitem_50 = None rsqrt_25: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_115); add_115 = None sub_38: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_326, getitem_51); convert_element_type_326 = getitem_51 = None mul_96: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_38, rsqrt_25); sub_38 = rsqrt_25 = None mul_97: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_96, arg155_1); mul_96 = arg155_1 = None add_116: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_97, arg156_1); mul_97 = arg156_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_295: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_116, [1, 68, 68, 512]); add_116 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_10: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_295, [0, 0, 0, 2, 0, 2], 0.0); view_295 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_296: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_10, [1, 10, 7, 10, 7, 512]); constant_pad_nd_10 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_113: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_296, [0, 1, 3, 2, 4, 5]); view_296 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_127: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_113, memory_format = torch.contiguous_format); permute_113 = None view_297: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_127, [-1, 7, 7, 512]); clone_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_298: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_297, [-1, 49, 512]); view_297 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_327: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg158_1, torch.float16); arg158_1 = None convert_element_type_328: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg157_1, torch.float16); arg157_1 = None convert_element_type_329: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_298, torch.float16); view_298 = None view_299: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_329, [4900, 512]); convert_element_type_329 = None permute_114: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_328, [1, 0]); convert_element_type_328 = None addmm_40: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_327, view_299, permute_114); convert_element_type_327 = view_299 = permute_114 = None view_300: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_40, [100, 49, 1536]); addmm_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_301: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_300, [100, 49, 3, 16, 32]); view_300 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_115: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_301, [2, 0, 3, 1, 4]); view_301 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_30: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 0) select_31: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 1) select_32: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_115, 0, 2); permute_115 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_98: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_30, 0.1767766952966369); select_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_116: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_31, [0, 1, 3, 2]); select_31 = None expand_40: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_98, [100, 16, 49, 32]); mul_98 = None clone_128: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_40, memory_format = torch.contiguous_format); expand_40 = None view_302: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_128, [1600, 49, 32]); clone_128 = None expand_41: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_116, [100, 16, 32, 49]); permute_116 = None clone_129: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_41, memory_format = torch.contiguous_format); expand_41 = None view_303: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_129, [1600, 32, 49]); clone_129 = None bmm_20: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_302, view_303); view_302 = view_303 = None view_304: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_20, [100, 16, 49, 49]); bmm_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_305: "i64[2401]" = torch.ops.aten.view.default(arg160_1, [-1]); arg160_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_30: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg159_1, [view_305]); arg159_1 = view_305 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_306: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_30, [49, 49, -1]); index_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_117: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_306, [2, 0, 1]); view_306 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_130: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_117, memory_format = torch.contiguous_format); permute_117 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_26: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_130, 0); clone_130 = None add_117: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_304, unsqueeze_26); view_304 = unsqueeze_26 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_10: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_117, [-1], True) sub_39: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_117, amax_10); add_117 = amax_10 = None exp_10: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_39); sub_39 = None sum_11: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_10, [-1], True) div_16: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_10, sum_11); exp_10 = sum_11 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_131: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_16); div_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_335: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_131, torch.float16); clone_131 = None expand_42: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_335, [100, 16, 49, 49]); convert_element_type_335 = None view_307: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_42, [1600, 49, 49]); expand_42 = None expand_43: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_32, [100, 16, 49, 32]); select_32 = None clone_132: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_43, memory_format = torch.contiguous_format); expand_43 = None view_308: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_132, [1600, 49, 32]); clone_132 = None bmm_21: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_307, view_308); view_307 = view_308 = None view_309: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_21, [100, 16, 49, 32]); bmm_21 = None permute_118: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_309, [0, 2, 1, 3]); view_309 = None clone_133: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_118, memory_format = torch.contiguous_format); permute_118 = None view_310: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_133, [100, 49, 512]); clone_133 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_338: "f16[512]" = torch.ops.prims.convert_element_type.default(arg162_1, torch.float16); arg162_1 = None convert_element_type_339: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg161_1, torch.float16); arg161_1 = None view_311: "f16[4900, 512]" = torch.ops.aten.view.default(view_310, [4900, 512]); view_310 = None permute_119: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_339, [1, 0]); convert_element_type_339 = None addmm_41: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_338, view_311, permute_119); convert_element_type_338 = view_311 = permute_119 = None view_312: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_41, [100, 49, 512]); addmm_41 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_134: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_312); view_312 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_313: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_134, [-1, 7, 7, 512]); clone_134 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_314: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_313, [1, 10, 10, 7, 7, -1]); view_313 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_120: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_314, [0, 1, 3, 2, 4, 5]); view_314 = None clone_135: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_120, memory_format = torch.contiguous_format); permute_120 = None view_315: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_135, [1, 70, 70, -1]); clone_135 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_496: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_315, 0, 0, 9223372036854775807); view_315 = None slice_497: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_496, 1, 0, 68); slice_496 = None slice_498: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_497, 2, 0, 68); slice_497 = None slice_499: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_498, 3, 0, 9223372036854775807); slice_498 = None clone_136: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_499, memory_format = torch.contiguous_format); slice_499 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_316: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_136, [1, 4624, 512]); clone_136 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_118: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_114, view_316); add_114 = view_316 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_343: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_118, torch.float32) var_mean_26 = torch.ops.aten.var_mean.correction(convert_element_type_343, [2], correction = 0, keepdim = True) getitem_52: "f32[1, 4624, 1]" = var_mean_26[0] getitem_53: "f32[1, 4624, 1]" = var_mean_26[1]; var_mean_26 = None add_119: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_52, 1e-05); getitem_52 = None rsqrt_26: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_119); add_119 = None sub_40: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_343, getitem_53); convert_element_type_343 = getitem_53 = None mul_99: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_40, rsqrt_26); sub_40 = rsqrt_26 = None mul_100: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_99, arg163_1); mul_99 = arg163_1 = None add_120: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_100, arg164_1); mul_100 = arg164_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_344: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg166_1, torch.float16); arg166_1 = None convert_element_type_345: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg165_1, torch.float16); arg165_1 = None convert_element_type_346: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_120, torch.float16); add_120 = None view_317: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_346, [4624, 512]); convert_element_type_346 = None permute_121: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_345, [1, 0]); convert_element_type_345 = None addmm_42: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_344, view_317, permute_121); convert_element_type_344 = view_317 = permute_121 = None view_318: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_42, [1, 4624, 2048]); addmm_42 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_350: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_318, torch.float32); view_318 = None mul_101: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_350, 0.5) mul_102: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_350, 0.7071067811865476); convert_element_type_350 = None erf_10: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_102); mul_102 = None add_121: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_10, 1); erf_10 = None mul_103: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_101, add_121); mul_101 = add_121 = None convert_element_type_351: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_103, torch.float16); mul_103 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_137: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_351); convert_element_type_351 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_352: "f16[512]" = torch.ops.prims.convert_element_type.default(arg168_1, torch.float16); arg168_1 = None convert_element_type_353: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg167_1, torch.float16); arg167_1 = None view_319: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_137, [4624, 2048]); clone_137 = None permute_122: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_353, [1, 0]); convert_element_type_353 = None addmm_43: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_352, view_319, permute_122); convert_element_type_352 = view_319 = permute_122 = None view_320: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_43, [1, 4624, 512]); addmm_43 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_138: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_320); view_320 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_122: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_118, clone_138); add_118 = clone_138 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_357: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_122, torch.float32) var_mean_27 = torch.ops.aten.var_mean.correction(convert_element_type_357, [2], correction = 0, keepdim = True) getitem_54: "f32[1, 4624, 1]" = var_mean_27[0] getitem_55: "f32[1, 4624, 1]" = var_mean_27[1]; var_mean_27 = None add_123: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_54, 1e-05); getitem_54 = None rsqrt_27: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_123); add_123 = None sub_41: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_357, getitem_55); convert_element_type_357 = getitem_55 = None mul_104: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_41, rsqrt_27); sub_41 = rsqrt_27 = None mul_105: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_104, arg169_1); mul_104 = arg169_1 = None add_124: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_105, arg170_1); mul_105 = arg170_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_321: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_124, [1, 68, 68, 512]); add_124 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_11: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_321, [0, 0, 0, 2, 0, 2], 0.0); view_321 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_20: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_125: "i64[70]" = torch.ops.aten.add.Tensor(iota_20, 3); iota_20 = None fmod_20: "i64[70]" = torch.ops.aten.fmod.Scalar(add_125, 70); add_125 = None slice_500: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_11, 0, 0, 9223372036854775807); constant_pad_nd_11 = None index_31: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_500, [None, fmod_20]); slice_500 = fmod_20 = None iota_21: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_126: "i64[70]" = torch.ops.aten.add.Tensor(iota_21, 3); iota_21 = None fmod_21: "i64[70]" = torch.ops.aten.fmod.Scalar(add_126, 70); add_126 = None slice_501: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_31, 0, 0, 9223372036854775807); index_31 = None slice_502: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_501, 1, 0, 9223372036854775807); slice_501 = None index_32: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_502, [None, None, fmod_21]); slice_502 = fmod_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_322: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_32, [1, 10, 7, 10, 7, 512]); index_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_123: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_322, [0, 1, 3, 2, 4, 5]); view_322 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_139: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_123, memory_format = torch.contiguous_format); permute_123 = None view_323: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_139, [-1, 7, 7, 512]); clone_139 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_324: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_323, [-1, 49, 512]); view_323 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_358: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg172_1, torch.float16); arg172_1 = None convert_element_type_359: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg171_1, torch.float16); arg171_1 = None convert_element_type_360: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_324, torch.float16); view_324 = None view_325: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_360, [4900, 512]); convert_element_type_360 = None permute_124: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_359, [1, 0]); convert_element_type_359 = None addmm_44: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_358, view_325, permute_124); convert_element_type_358 = view_325 = permute_124 = None view_326: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_44, [100, 49, 1536]); addmm_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_327: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_326, [100, 49, 3, 16, 32]); view_326 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_125: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_327, [2, 0, 3, 1, 4]); view_327 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_33: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 0) select_34: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 1) select_35: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_125, 0, 2); permute_125 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_106: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_33, 0.1767766952966369); select_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_126: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_34, [0, 1, 3, 2]); select_34 = None expand_44: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_106, [100, 16, 49, 32]); mul_106 = None clone_140: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_44, memory_format = torch.contiguous_format); expand_44 = None view_328: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_140, [1600, 49, 32]); clone_140 = None expand_45: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_126, [100, 16, 32, 49]); permute_126 = None clone_141: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_45, memory_format = torch.contiguous_format); expand_45 = None view_329: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_141, [1600, 32, 49]); clone_141 = None bmm_22: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_328, view_329); view_328 = view_329 = None view_330: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_22, [100, 16, 49, 49]); bmm_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_331: "i64[2401]" = torch.ops.aten.view.default(arg174_1, [-1]); arg174_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_33: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg173_1, [view_331]); arg173_1 = view_331 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_332: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_33, [49, 49, -1]); index_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_127: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_332, [2, 0, 1]); view_332 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_142: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_127, memory_format = torch.contiguous_format); permute_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_27: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_142, 0); clone_142 = None add_127: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_330, unsqueeze_27); view_330 = unsqueeze_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_333: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_127, [1, 100, 16, 49, 49]); add_127 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_28: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_29: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_28, 0); unsqueeze_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_128: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_333, unsqueeze_29); view_333 = unsqueeze_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_334: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_128, [-1, 16, 49, 49]); add_128 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_11: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_334, [-1], True) sub_42: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_334, amax_11); view_334 = amax_11 = None exp_11: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_42); sub_42 = None sum_12: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_11, [-1], True) div_17: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_11, sum_12); exp_11 = sum_12 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_143: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_17); div_17 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_366: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_143, torch.float16); clone_143 = None expand_46: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_366, [100, 16, 49, 49]); convert_element_type_366 = None view_335: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_46, [1600, 49, 49]); expand_46 = None expand_47: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_35, [100, 16, 49, 32]); select_35 = None clone_144: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_47, memory_format = torch.contiguous_format); expand_47 = None view_336: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_144, [1600, 49, 32]); clone_144 = None bmm_23: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_335, view_336); view_335 = view_336 = None view_337: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_23, [100, 16, 49, 32]); bmm_23 = None permute_128: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_337, [0, 2, 1, 3]); view_337 = None clone_145: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_128, memory_format = torch.contiguous_format); permute_128 = None view_338: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_145, [100, 49, 512]); clone_145 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_369: "f16[512]" = torch.ops.prims.convert_element_type.default(arg176_1, torch.float16); arg176_1 = None convert_element_type_370: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg175_1, torch.float16); arg175_1 = None view_339: "f16[4900, 512]" = torch.ops.aten.view.default(view_338, [4900, 512]); view_338 = None permute_129: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_370, [1, 0]); convert_element_type_370 = None addmm_45: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_369, view_339, permute_129); convert_element_type_369 = view_339 = permute_129 = None view_340: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_45, [100, 49, 512]); addmm_45 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_146: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_340); view_340 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_341: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_146, [-1, 7, 7, 512]); clone_146 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_342: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_341, [1, 10, 10, 7, 7, -1]); view_341 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_130: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_342, [0, 1, 3, 2, 4, 5]); view_342 = None clone_147: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_130, memory_format = torch.contiguous_format); permute_130 = None view_343: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_147, [1, 70, 70, -1]); clone_147 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_22: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_129: "i64[70]" = torch.ops.aten.add.Tensor(iota_22, 67); iota_22 = None fmod_22: "i64[70]" = torch.ops.aten.fmod.Scalar(add_129, 70); add_129 = None slice_503: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_343, 0, 0, 9223372036854775807); view_343 = None index_34: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_503, [None, fmod_22]); slice_503 = fmod_22 = None iota_23: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_130: "i64[70]" = torch.ops.aten.add.Tensor(iota_23, 67); iota_23 = None fmod_23: "i64[70]" = torch.ops.aten.fmod.Scalar(add_130, 70); add_130 = None slice_504: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_34, 0, 0, 9223372036854775807); index_34 = None slice_505: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_504, 1, 0, 9223372036854775807); slice_504 = None index_35: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_505, [None, None, fmod_23]); slice_505 = fmod_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_506: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_35, 0, 0, 9223372036854775807); index_35 = None slice_507: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_506, 1, 0, 68); slice_506 = None slice_508: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_507, 2, 0, 68); slice_507 = None slice_509: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_508, 3, 0, 9223372036854775807); slice_508 = None clone_148: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_509, memory_format = torch.contiguous_format); slice_509 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_344: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_148, [1, 4624, 512]); clone_148 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_131: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_122, view_344); add_122 = view_344 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_374: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_131, torch.float32) var_mean_28 = torch.ops.aten.var_mean.correction(convert_element_type_374, [2], correction = 0, keepdim = True) getitem_56: "f32[1, 4624, 1]" = var_mean_28[0] getitem_57: "f32[1, 4624, 1]" = var_mean_28[1]; var_mean_28 = None add_132: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_56, 1e-05); getitem_56 = None rsqrt_28: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_132); add_132 = None sub_43: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_374, getitem_57); convert_element_type_374 = getitem_57 = None mul_107: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_43, rsqrt_28); sub_43 = rsqrt_28 = None mul_108: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_107, arg177_1); mul_107 = arg177_1 = None add_133: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_108, arg178_1); mul_108 = arg178_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_375: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg180_1, torch.float16); arg180_1 = None convert_element_type_376: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg179_1, torch.float16); arg179_1 = None convert_element_type_377: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_133, torch.float16); add_133 = None view_345: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_377, [4624, 512]); convert_element_type_377 = None permute_131: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_376, [1, 0]); convert_element_type_376 = None addmm_46: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_375, view_345, permute_131); convert_element_type_375 = view_345 = permute_131 = None view_346: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_46, [1, 4624, 2048]); addmm_46 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_381: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_346, torch.float32); view_346 = None mul_109: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_381, 0.5) mul_110: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_381, 0.7071067811865476); convert_element_type_381 = None erf_11: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_110); mul_110 = None add_134: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_11, 1); erf_11 = None mul_111: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_109, add_134); mul_109 = add_134 = None convert_element_type_382: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_111, torch.float16); mul_111 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_149: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_382); convert_element_type_382 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_383: "f16[512]" = torch.ops.prims.convert_element_type.default(arg182_1, torch.float16); arg182_1 = None convert_element_type_384: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg181_1, torch.float16); arg181_1 = None view_347: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_149, [4624, 2048]); clone_149 = None permute_132: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_384, [1, 0]); convert_element_type_384 = None addmm_47: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_383, view_347, permute_132); convert_element_type_383 = view_347 = permute_132 = None view_348: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_47, [1, 4624, 512]); addmm_47 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_150: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_348); view_348 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_135: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_131, clone_150); add_131 = clone_150 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_388: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_135, torch.float32) var_mean_29 = torch.ops.aten.var_mean.correction(convert_element_type_388, [2], correction = 0, keepdim = True) getitem_58: "f32[1, 4624, 1]" = var_mean_29[0] getitem_59: "f32[1, 4624, 1]" = var_mean_29[1]; var_mean_29 = None add_136: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_58, 1e-05); getitem_58 = None rsqrt_29: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_136); add_136 = None sub_44: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_388, getitem_59); convert_element_type_388 = getitem_59 = None mul_112: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_44, rsqrt_29); sub_44 = rsqrt_29 = None mul_113: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_112, arg183_1); mul_112 = arg183_1 = None add_137: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_113, arg184_1); mul_113 = arg184_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_349: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_137, [1, 68, 68, 512]); add_137 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_12: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_349, [0, 0, 0, 2, 0, 2], 0.0); view_349 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_350: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_12, [1, 10, 7, 10, 7, 512]); constant_pad_nd_12 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_133: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_350, [0, 1, 3, 2, 4, 5]); view_350 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_151: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_133, memory_format = torch.contiguous_format); permute_133 = None view_351: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_151, [-1, 7, 7, 512]); clone_151 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_352: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_351, [-1, 49, 512]); view_351 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_389: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg186_1, torch.float16); arg186_1 = None convert_element_type_390: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg185_1, torch.float16); arg185_1 = None convert_element_type_391: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_352, torch.float16); view_352 = None view_353: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_391, [4900, 512]); convert_element_type_391 = None permute_134: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_390, [1, 0]); convert_element_type_390 = None addmm_48: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_389, view_353, permute_134); convert_element_type_389 = view_353 = permute_134 = None view_354: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_48, [100, 49, 1536]); addmm_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_355: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_354, [100, 49, 3, 16, 32]); view_354 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_135: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_355, [2, 0, 3, 1, 4]); view_355 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_36: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 0) select_37: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 1) select_38: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_135, 0, 2); permute_135 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_114: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_36, 0.1767766952966369); select_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_136: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_37, [0, 1, 3, 2]); select_37 = None expand_48: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_114, [100, 16, 49, 32]); mul_114 = None clone_152: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_48, memory_format = torch.contiguous_format); expand_48 = None view_356: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_152, [1600, 49, 32]); clone_152 = None expand_49: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_136, [100, 16, 32, 49]); permute_136 = None clone_153: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_49, memory_format = torch.contiguous_format); expand_49 = None view_357: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_153, [1600, 32, 49]); clone_153 = None bmm_24: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_356, view_357); view_356 = view_357 = None view_358: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_24, [100, 16, 49, 49]); bmm_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_359: "i64[2401]" = torch.ops.aten.view.default(arg188_1, [-1]); arg188_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_36: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg187_1, [view_359]); arg187_1 = view_359 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_360: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_36, [49, 49, -1]); index_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_137: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_360, [2, 0, 1]); view_360 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_154: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_137, memory_format = torch.contiguous_format); permute_137 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_30: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_154, 0); clone_154 = None add_138: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_358, unsqueeze_30); view_358 = unsqueeze_30 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_12: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_138, [-1], True) sub_45: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_138, amax_12); add_138 = amax_12 = None exp_12: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_45); sub_45 = None sum_13: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_12, [-1], True) div_18: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_12, sum_13); exp_12 = sum_13 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_155: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_18); div_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_397: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_155, torch.float16); clone_155 = None expand_50: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_397, [100, 16, 49, 49]); convert_element_type_397 = None view_361: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_50, [1600, 49, 49]); expand_50 = None expand_51: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_38, [100, 16, 49, 32]); select_38 = None clone_156: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_51, memory_format = torch.contiguous_format); expand_51 = None view_362: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_156, [1600, 49, 32]); clone_156 = None bmm_25: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_361, view_362); view_361 = view_362 = None view_363: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_25, [100, 16, 49, 32]); bmm_25 = None permute_138: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_363, [0, 2, 1, 3]); view_363 = None clone_157: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_138, memory_format = torch.contiguous_format); permute_138 = None view_364: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_157, [100, 49, 512]); clone_157 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_400: "f16[512]" = torch.ops.prims.convert_element_type.default(arg190_1, torch.float16); arg190_1 = None convert_element_type_401: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg189_1, torch.float16); arg189_1 = None view_365: "f16[4900, 512]" = torch.ops.aten.view.default(view_364, [4900, 512]); view_364 = None permute_139: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_401, [1, 0]); convert_element_type_401 = None addmm_49: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_400, view_365, permute_139); convert_element_type_400 = view_365 = permute_139 = None view_366: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_49, [100, 49, 512]); addmm_49 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_158: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_366); view_366 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_367: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_158, [-1, 7, 7, 512]); clone_158 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_368: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_367, [1, 10, 10, 7, 7, -1]); view_367 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_140: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_368, [0, 1, 3, 2, 4, 5]); view_368 = None clone_159: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_140, memory_format = torch.contiguous_format); permute_140 = None view_369: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_159, [1, 70, 70, -1]); clone_159 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_510: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_369, 0, 0, 9223372036854775807); view_369 = None slice_511: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_510, 1, 0, 68); slice_510 = None slice_512: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_511, 2, 0, 68); slice_511 = None slice_513: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_512, 3, 0, 9223372036854775807); slice_512 = None clone_160: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_513, memory_format = torch.contiguous_format); slice_513 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_370: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_160, [1, 4624, 512]); clone_160 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_139: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_135, view_370); add_135 = view_370 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_405: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_139, torch.float32) var_mean_30 = torch.ops.aten.var_mean.correction(convert_element_type_405, [2], correction = 0, keepdim = True) getitem_60: "f32[1, 4624, 1]" = var_mean_30[0] getitem_61: "f32[1, 4624, 1]" = var_mean_30[1]; var_mean_30 = None add_140: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_60, 1e-05); getitem_60 = None rsqrt_30: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_140); add_140 = None sub_46: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_405, getitem_61); convert_element_type_405 = getitem_61 = None mul_115: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_46, rsqrt_30); sub_46 = rsqrt_30 = None mul_116: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_115, arg191_1); mul_115 = arg191_1 = None add_141: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_116, arg192_1); mul_116 = arg192_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_406: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg194_1, torch.float16); arg194_1 = None convert_element_type_407: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg193_1, torch.float16); arg193_1 = None convert_element_type_408: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_141, torch.float16); add_141 = None view_371: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_408, [4624, 512]); convert_element_type_408 = None permute_141: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_407, [1, 0]); convert_element_type_407 = None addmm_50: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_406, view_371, permute_141); convert_element_type_406 = view_371 = permute_141 = None view_372: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_50, [1, 4624, 2048]); addmm_50 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_412: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_372, torch.float32); view_372 = None mul_117: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_412, 0.5) mul_118: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_412, 0.7071067811865476); convert_element_type_412 = None erf_12: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_118); mul_118 = None add_142: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_12, 1); erf_12 = None mul_119: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_117, add_142); mul_117 = add_142 = None convert_element_type_413: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_119, torch.float16); mul_119 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_161: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_413); convert_element_type_413 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_414: "f16[512]" = torch.ops.prims.convert_element_type.default(arg196_1, torch.float16); arg196_1 = None convert_element_type_415: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg195_1, torch.float16); arg195_1 = None view_373: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_161, [4624, 2048]); clone_161 = None permute_142: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_415, [1, 0]); convert_element_type_415 = None addmm_51: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_414, view_373, permute_142); convert_element_type_414 = view_373 = permute_142 = None view_374: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_51, [1, 4624, 512]); addmm_51 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_162: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_374); view_374 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_143: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_139, clone_162); add_139 = clone_162 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_419: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_143, torch.float32) var_mean_31 = torch.ops.aten.var_mean.correction(convert_element_type_419, [2], correction = 0, keepdim = True) getitem_62: "f32[1, 4624, 1]" = var_mean_31[0] getitem_63: "f32[1, 4624, 1]" = var_mean_31[1]; var_mean_31 = None add_144: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_62, 1e-05); getitem_62 = None rsqrt_31: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_144); add_144 = None sub_47: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_419, getitem_63); convert_element_type_419 = getitem_63 = None mul_120: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_47, rsqrt_31); sub_47 = rsqrt_31 = None mul_121: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_120, arg197_1); mul_120 = arg197_1 = None add_145: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_121, arg198_1); mul_121 = arg198_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_375: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_145, [1, 68, 68, 512]); add_145 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_13: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_375, [0, 0, 0, 2, 0, 2], 0.0); view_375 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_24: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_146: "i64[70]" = torch.ops.aten.add.Tensor(iota_24, 3); iota_24 = None fmod_24: "i64[70]" = torch.ops.aten.fmod.Scalar(add_146, 70); add_146 = None slice_514: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_13, 0, 0, 9223372036854775807); constant_pad_nd_13 = None index_37: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_514, [None, fmod_24]); slice_514 = fmod_24 = None iota_25: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_147: "i64[70]" = torch.ops.aten.add.Tensor(iota_25, 3); iota_25 = None fmod_25: "i64[70]" = torch.ops.aten.fmod.Scalar(add_147, 70); add_147 = None slice_515: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_37, 0, 0, 9223372036854775807); index_37 = None slice_516: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_515, 1, 0, 9223372036854775807); slice_515 = None index_38: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_516, [None, None, fmod_25]); slice_516 = fmod_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_376: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_38, [1, 10, 7, 10, 7, 512]); index_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_143: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_376, [0, 1, 3, 2, 4, 5]); view_376 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_163: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_143, memory_format = torch.contiguous_format); permute_143 = None view_377: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_163, [-1, 7, 7, 512]); clone_163 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_378: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_377, [-1, 49, 512]); view_377 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_420: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg200_1, torch.float16); arg200_1 = None convert_element_type_421: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg199_1, torch.float16); arg199_1 = None convert_element_type_422: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_378, torch.float16); view_378 = None view_379: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_422, [4900, 512]); convert_element_type_422 = None permute_144: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_421, [1, 0]); convert_element_type_421 = None addmm_52: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_420, view_379, permute_144); convert_element_type_420 = view_379 = permute_144 = None view_380: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_52, [100, 49, 1536]); addmm_52 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_381: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_380, [100, 49, 3, 16, 32]); view_380 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_145: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_381, [2, 0, 3, 1, 4]); view_381 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_39: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 0) select_40: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 1) select_41: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_145, 0, 2); permute_145 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_122: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_39, 0.1767766952966369); select_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_146: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_40, [0, 1, 3, 2]); select_40 = None expand_52: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_122, [100, 16, 49, 32]); mul_122 = None clone_164: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_52, memory_format = torch.contiguous_format); expand_52 = None view_382: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_164, [1600, 49, 32]); clone_164 = None expand_53: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_146, [100, 16, 32, 49]); permute_146 = None clone_165: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_53, memory_format = torch.contiguous_format); expand_53 = None view_383: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_165, [1600, 32, 49]); clone_165 = None bmm_26: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_382, view_383); view_382 = view_383 = None view_384: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_26, [100, 16, 49, 49]); bmm_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_385: "i64[2401]" = torch.ops.aten.view.default(arg202_1, [-1]); arg202_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_39: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg201_1, [view_385]); arg201_1 = view_385 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_386: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_39, [49, 49, -1]); index_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_147: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_386, [2, 0, 1]); view_386 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_166: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_147, memory_format = torch.contiguous_format); permute_147 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_31: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_166, 0); clone_166 = None add_148: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_384, unsqueeze_31); view_384 = unsqueeze_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_387: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_148, [1, 100, 16, 49, 49]); add_148 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_32: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_33: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_32, 0); unsqueeze_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_149: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_387, unsqueeze_33); view_387 = unsqueeze_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_388: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_149, [-1, 16, 49, 49]); add_149 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_13: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_388, [-1], True) sub_48: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_388, amax_13); view_388 = amax_13 = None exp_13: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_48); sub_48 = None sum_14: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_13, [-1], True) div_19: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_13, sum_14); exp_13 = sum_14 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_167: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_19); div_19 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_428: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_167, torch.float16); clone_167 = None expand_54: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_428, [100, 16, 49, 49]); convert_element_type_428 = None view_389: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_54, [1600, 49, 49]); expand_54 = None expand_55: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_41, [100, 16, 49, 32]); select_41 = None clone_168: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_55, memory_format = torch.contiguous_format); expand_55 = None view_390: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_168, [1600, 49, 32]); clone_168 = None bmm_27: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_389, view_390); view_389 = view_390 = None view_391: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_27, [100, 16, 49, 32]); bmm_27 = None permute_148: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_391, [0, 2, 1, 3]); view_391 = None clone_169: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_148, memory_format = torch.contiguous_format); permute_148 = None view_392: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_169, [100, 49, 512]); clone_169 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_431: "f16[512]" = torch.ops.prims.convert_element_type.default(arg204_1, torch.float16); arg204_1 = None convert_element_type_432: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg203_1, torch.float16); arg203_1 = None view_393: "f16[4900, 512]" = torch.ops.aten.view.default(view_392, [4900, 512]); view_392 = None permute_149: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_432, [1, 0]); convert_element_type_432 = None addmm_53: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_431, view_393, permute_149); convert_element_type_431 = view_393 = permute_149 = None view_394: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_53, [100, 49, 512]); addmm_53 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_170: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_394); view_394 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_395: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_170, [-1, 7, 7, 512]); clone_170 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_396: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_395, [1, 10, 10, 7, 7, -1]); view_395 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_150: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_396, [0, 1, 3, 2, 4, 5]); view_396 = None clone_171: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_150, memory_format = torch.contiguous_format); permute_150 = None view_397: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_171, [1, 70, 70, -1]); clone_171 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_26: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_150: "i64[70]" = torch.ops.aten.add.Tensor(iota_26, 67); iota_26 = None fmod_26: "i64[70]" = torch.ops.aten.fmod.Scalar(add_150, 70); add_150 = None slice_517: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_397, 0, 0, 9223372036854775807); view_397 = None index_40: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_517, [None, fmod_26]); slice_517 = fmod_26 = None iota_27: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_151: "i64[70]" = torch.ops.aten.add.Tensor(iota_27, 67); iota_27 = None fmod_27: "i64[70]" = torch.ops.aten.fmod.Scalar(add_151, 70); add_151 = None slice_518: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_40, 0, 0, 9223372036854775807); index_40 = None slice_519: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_518, 1, 0, 9223372036854775807); slice_518 = None index_41: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_519, [None, None, fmod_27]); slice_519 = fmod_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_520: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_41, 0, 0, 9223372036854775807); index_41 = None slice_521: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_520, 1, 0, 68); slice_520 = None slice_522: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_521, 2, 0, 68); slice_521 = None slice_523: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_522, 3, 0, 9223372036854775807); slice_522 = None clone_172: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_523, memory_format = torch.contiguous_format); slice_523 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_398: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_172, [1, 4624, 512]); clone_172 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_152: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_143, view_398); add_143 = view_398 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_436: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_152, torch.float32) var_mean_32 = torch.ops.aten.var_mean.correction(convert_element_type_436, [2], correction = 0, keepdim = True) getitem_64: "f32[1, 4624, 1]" = var_mean_32[0] getitem_65: "f32[1, 4624, 1]" = var_mean_32[1]; var_mean_32 = None add_153: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_64, 1e-05); getitem_64 = None rsqrt_32: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_153); add_153 = None sub_49: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_436, getitem_65); convert_element_type_436 = getitem_65 = None mul_123: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_49, rsqrt_32); sub_49 = rsqrt_32 = None mul_124: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_123, arg205_1); mul_123 = arg205_1 = None add_154: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_124, arg206_1); mul_124 = arg206_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_437: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg208_1, torch.float16); arg208_1 = None convert_element_type_438: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg207_1, torch.float16); arg207_1 = None convert_element_type_439: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_154, torch.float16); add_154 = None view_399: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_439, [4624, 512]); convert_element_type_439 = None permute_151: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_438, [1, 0]); convert_element_type_438 = None addmm_54: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_437, view_399, permute_151); convert_element_type_437 = view_399 = permute_151 = None view_400: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_54, [1, 4624, 2048]); addmm_54 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_443: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_400, torch.float32); view_400 = None mul_125: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_443, 0.5) mul_126: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_443, 0.7071067811865476); convert_element_type_443 = None erf_13: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_126); mul_126 = None add_155: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_13, 1); erf_13 = None mul_127: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_125, add_155); mul_125 = add_155 = None convert_element_type_444: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_127, torch.float16); mul_127 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_173: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_444); convert_element_type_444 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_445: "f16[512]" = torch.ops.prims.convert_element_type.default(arg210_1, torch.float16); arg210_1 = None convert_element_type_446: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg209_1, torch.float16); arg209_1 = None view_401: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_173, [4624, 2048]); clone_173 = None permute_152: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_446, [1, 0]); convert_element_type_446 = None addmm_55: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_445, view_401, permute_152); convert_element_type_445 = view_401 = permute_152 = None view_402: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_55, [1, 4624, 512]); addmm_55 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_174: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_402); view_402 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_156: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_152, clone_174); add_152 = clone_174 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_450: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_156, torch.float32) var_mean_33 = torch.ops.aten.var_mean.correction(convert_element_type_450, [2], correction = 0, keepdim = True) getitem_66: "f32[1, 4624, 1]" = var_mean_33[0] getitem_67: "f32[1, 4624, 1]" = var_mean_33[1]; var_mean_33 = None add_157: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_66, 1e-05); getitem_66 = None rsqrt_33: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_157); add_157 = None sub_50: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_450, getitem_67); convert_element_type_450 = getitem_67 = None mul_128: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_50, rsqrt_33); sub_50 = rsqrt_33 = None mul_129: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_128, arg211_1); mul_128 = arg211_1 = None add_158: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_129, arg212_1); mul_129 = arg212_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_403: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_158, [1, 68, 68, 512]); add_158 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_14: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_403, [0, 0, 0, 2, 0, 2], 0.0); view_403 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_404: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_14, [1, 10, 7, 10, 7, 512]); constant_pad_nd_14 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_153: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_404, [0, 1, 3, 2, 4, 5]); view_404 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_175: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_153, memory_format = torch.contiguous_format); permute_153 = None view_405: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_175, [-1, 7, 7, 512]); clone_175 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_406: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_405, [-1, 49, 512]); view_405 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_451: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg214_1, torch.float16); arg214_1 = None convert_element_type_452: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg213_1, torch.float16); arg213_1 = None convert_element_type_453: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_406, torch.float16); view_406 = None view_407: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_453, [4900, 512]); convert_element_type_453 = None permute_154: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_452, [1, 0]); convert_element_type_452 = None addmm_56: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_451, view_407, permute_154); convert_element_type_451 = view_407 = permute_154 = None view_408: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_56, [100, 49, 1536]); addmm_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_409: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_408, [100, 49, 3, 16, 32]); view_408 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_155: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_409, [2, 0, 3, 1, 4]); view_409 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_42: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 0) select_43: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 1) select_44: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_155, 0, 2); permute_155 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_130: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_42, 0.1767766952966369); select_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_156: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_43, [0, 1, 3, 2]); select_43 = None expand_56: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_130, [100, 16, 49, 32]); mul_130 = None clone_176: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_56, memory_format = torch.contiguous_format); expand_56 = None view_410: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_176, [1600, 49, 32]); clone_176 = None expand_57: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_156, [100, 16, 32, 49]); permute_156 = None clone_177: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_57, memory_format = torch.contiguous_format); expand_57 = None view_411: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_177, [1600, 32, 49]); clone_177 = None bmm_28: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_410, view_411); view_410 = view_411 = None view_412: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_28, [100, 16, 49, 49]); bmm_28 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_413: "i64[2401]" = torch.ops.aten.view.default(arg216_1, [-1]); arg216_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_42: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg215_1, [view_413]); arg215_1 = view_413 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_414: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_42, [49, 49, -1]); index_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_157: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_414, [2, 0, 1]); view_414 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_178: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_157, memory_format = torch.contiguous_format); permute_157 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_34: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_178, 0); clone_178 = None add_159: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_412, unsqueeze_34); view_412 = unsqueeze_34 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_14: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_159, [-1], True) sub_51: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_159, amax_14); add_159 = amax_14 = None exp_14: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_51); sub_51 = None sum_15: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_14, [-1], True) div_20: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_14, sum_15); exp_14 = sum_15 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_179: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_20); div_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_459: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_179, torch.float16); clone_179 = None expand_58: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_459, [100, 16, 49, 49]); convert_element_type_459 = None view_415: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_58, [1600, 49, 49]); expand_58 = None expand_59: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_44, [100, 16, 49, 32]); select_44 = None clone_180: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_59, memory_format = torch.contiguous_format); expand_59 = None view_416: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_180, [1600, 49, 32]); clone_180 = None bmm_29: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_415, view_416); view_415 = view_416 = None view_417: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_29, [100, 16, 49, 32]); bmm_29 = None permute_158: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_417, [0, 2, 1, 3]); view_417 = None clone_181: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_158, memory_format = torch.contiguous_format); permute_158 = None view_418: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_181, [100, 49, 512]); clone_181 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_462: "f16[512]" = torch.ops.prims.convert_element_type.default(arg218_1, torch.float16); arg218_1 = None convert_element_type_463: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg217_1, torch.float16); arg217_1 = None view_419: "f16[4900, 512]" = torch.ops.aten.view.default(view_418, [4900, 512]); view_418 = None permute_159: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_463, [1, 0]); convert_element_type_463 = None addmm_57: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_462, view_419, permute_159); convert_element_type_462 = view_419 = permute_159 = None view_420: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_57, [100, 49, 512]); addmm_57 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_182: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_420); view_420 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_421: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_182, [-1, 7, 7, 512]); clone_182 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_422: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_421, [1, 10, 10, 7, 7, -1]); view_421 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_160: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_422, [0, 1, 3, 2, 4, 5]); view_422 = None clone_183: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_160, memory_format = torch.contiguous_format); permute_160 = None view_423: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_183, [1, 70, 70, -1]); clone_183 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_524: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_423, 0, 0, 9223372036854775807); view_423 = None slice_525: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_524, 1, 0, 68); slice_524 = None slice_526: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_525, 2, 0, 68); slice_525 = None slice_527: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_526, 3, 0, 9223372036854775807); slice_526 = None clone_184: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_527, memory_format = torch.contiguous_format); slice_527 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_424: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_184, [1, 4624, 512]); clone_184 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_160: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_156, view_424); add_156 = view_424 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_467: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_160, torch.float32) var_mean_34 = torch.ops.aten.var_mean.correction(convert_element_type_467, [2], correction = 0, keepdim = True) getitem_68: "f32[1, 4624, 1]" = var_mean_34[0] getitem_69: "f32[1, 4624, 1]" = var_mean_34[1]; var_mean_34 = None add_161: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_68, 1e-05); getitem_68 = None rsqrt_34: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_161); add_161 = None sub_52: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_467, getitem_69); convert_element_type_467 = getitem_69 = None mul_131: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_52, rsqrt_34); sub_52 = rsqrt_34 = None mul_132: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_131, arg219_1); mul_131 = arg219_1 = None add_162: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_132, arg220_1); mul_132 = arg220_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_468: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg222_1, torch.float16); arg222_1 = None convert_element_type_469: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg221_1, torch.float16); arg221_1 = None convert_element_type_470: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_162, torch.float16); add_162 = None view_425: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_470, [4624, 512]); convert_element_type_470 = None permute_161: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_469, [1, 0]); convert_element_type_469 = None addmm_58: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_468, view_425, permute_161); convert_element_type_468 = view_425 = permute_161 = None view_426: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_58, [1, 4624, 2048]); addmm_58 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_474: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_426, torch.float32); view_426 = None mul_133: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_474, 0.5) mul_134: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_474, 0.7071067811865476); convert_element_type_474 = None erf_14: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_134); mul_134 = None add_163: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_14, 1); erf_14 = None mul_135: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_133, add_163); mul_133 = add_163 = None convert_element_type_475: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_135, torch.float16); mul_135 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_185: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_475); convert_element_type_475 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_476: "f16[512]" = torch.ops.prims.convert_element_type.default(arg224_1, torch.float16); arg224_1 = None convert_element_type_477: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg223_1, torch.float16); arg223_1 = None view_427: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_185, [4624, 2048]); clone_185 = None permute_162: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_477, [1, 0]); convert_element_type_477 = None addmm_59: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_476, view_427, permute_162); convert_element_type_476 = view_427 = permute_162 = None view_428: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_59, [1, 4624, 512]); addmm_59 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_186: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_428); view_428 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_164: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_160, clone_186); add_160 = clone_186 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_481: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_164, torch.float32) var_mean_35 = torch.ops.aten.var_mean.correction(convert_element_type_481, [2], correction = 0, keepdim = True) getitem_70: "f32[1, 4624, 1]" = var_mean_35[0] getitem_71: "f32[1, 4624, 1]" = var_mean_35[1]; var_mean_35 = None add_165: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_70, 1e-05); getitem_70 = None rsqrt_35: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_165); add_165 = None sub_53: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_481, getitem_71); convert_element_type_481 = getitem_71 = None mul_136: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_53, rsqrt_35); sub_53 = rsqrt_35 = None mul_137: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_136, arg225_1); mul_136 = arg225_1 = None add_166: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_137, arg226_1); mul_137 = arg226_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_429: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_166, [1, 68, 68, 512]); add_166 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_15: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_429, [0, 0, 0, 2, 0, 2], 0.0); view_429 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_28: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_167: "i64[70]" = torch.ops.aten.add.Tensor(iota_28, 3); iota_28 = None fmod_28: "i64[70]" = torch.ops.aten.fmod.Scalar(add_167, 70); add_167 = None slice_528: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_15, 0, 0, 9223372036854775807); constant_pad_nd_15 = None index_43: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_528, [None, fmod_28]); slice_528 = fmod_28 = None iota_29: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_168: "i64[70]" = torch.ops.aten.add.Tensor(iota_29, 3); iota_29 = None fmod_29: "i64[70]" = torch.ops.aten.fmod.Scalar(add_168, 70); add_168 = None slice_529: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_43, 0, 0, 9223372036854775807); index_43 = None slice_530: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_529, 1, 0, 9223372036854775807); slice_529 = None index_44: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_530, [None, None, fmod_29]); slice_530 = fmod_29 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_430: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_44, [1, 10, 7, 10, 7, 512]); index_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_163: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_430, [0, 1, 3, 2, 4, 5]); view_430 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_187: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_163, memory_format = torch.contiguous_format); permute_163 = None view_431: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_187, [-1, 7, 7, 512]); clone_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_432: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_431, [-1, 49, 512]); view_431 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_482: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg228_1, torch.float16); arg228_1 = None convert_element_type_483: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg227_1, torch.float16); arg227_1 = None convert_element_type_484: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_432, torch.float16); view_432 = None view_433: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_484, [4900, 512]); convert_element_type_484 = None permute_164: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_483, [1, 0]); convert_element_type_483 = None addmm_60: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_482, view_433, permute_164); convert_element_type_482 = view_433 = permute_164 = None view_434: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_60, [100, 49, 1536]); addmm_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_435: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_434, [100, 49, 3, 16, 32]); view_434 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_165: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_435, [2, 0, 3, 1, 4]); view_435 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_45: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 0) select_46: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 1) select_47: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_165, 0, 2); permute_165 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_138: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_45, 0.1767766952966369); select_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_166: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_46, [0, 1, 3, 2]); select_46 = None expand_60: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_138, [100, 16, 49, 32]); mul_138 = None clone_188: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_60, memory_format = torch.contiguous_format); expand_60 = None view_436: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_188, [1600, 49, 32]); clone_188 = None expand_61: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_166, [100, 16, 32, 49]); permute_166 = None clone_189: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_61, memory_format = torch.contiguous_format); expand_61 = None view_437: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_189, [1600, 32, 49]); clone_189 = None bmm_30: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_436, view_437); view_436 = view_437 = None view_438: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_30, [100, 16, 49, 49]); bmm_30 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_439: "i64[2401]" = torch.ops.aten.view.default(arg230_1, [-1]); arg230_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_45: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg229_1, [view_439]); arg229_1 = view_439 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_440: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_45, [49, 49, -1]); index_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_167: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_440, [2, 0, 1]); view_440 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_190: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_167, memory_format = torch.contiguous_format); permute_167 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_35: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_190, 0); clone_190 = None add_169: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_438, unsqueeze_35); view_438 = unsqueeze_35 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_441: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_169, [1, 100, 16, 49, 49]); add_169 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_36: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_37: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_36, 0); unsqueeze_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_170: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_441, unsqueeze_37); view_441 = unsqueeze_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_442: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_170, [-1, 16, 49, 49]); add_170 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_15: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_442, [-1], True) sub_54: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_442, amax_15); view_442 = amax_15 = None exp_15: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_54); sub_54 = None sum_16: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_15, [-1], True) div_21: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_15, sum_16); exp_15 = sum_16 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_191: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_21); div_21 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_490: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_191, torch.float16); clone_191 = None expand_62: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_490, [100, 16, 49, 49]); convert_element_type_490 = None view_443: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_62, [1600, 49, 49]); expand_62 = None expand_63: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_47, [100, 16, 49, 32]); select_47 = None clone_192: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_63, memory_format = torch.contiguous_format); expand_63 = None view_444: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_192, [1600, 49, 32]); clone_192 = None bmm_31: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_443, view_444); view_443 = view_444 = None view_445: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_31, [100, 16, 49, 32]); bmm_31 = None permute_168: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_445, [0, 2, 1, 3]); view_445 = None clone_193: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_168, memory_format = torch.contiguous_format); permute_168 = None view_446: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_193, [100, 49, 512]); clone_193 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_493: "f16[512]" = torch.ops.prims.convert_element_type.default(arg232_1, torch.float16); arg232_1 = None convert_element_type_494: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg231_1, torch.float16); arg231_1 = None view_447: "f16[4900, 512]" = torch.ops.aten.view.default(view_446, [4900, 512]); view_446 = None permute_169: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_494, [1, 0]); convert_element_type_494 = None addmm_61: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_493, view_447, permute_169); convert_element_type_493 = view_447 = permute_169 = None view_448: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_61, [100, 49, 512]); addmm_61 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_194: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_448); view_448 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_449: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_194, [-1, 7, 7, 512]); clone_194 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_450: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_449, [1, 10, 10, 7, 7, -1]); view_449 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_170: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_450, [0, 1, 3, 2, 4, 5]); view_450 = None clone_195: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_170, memory_format = torch.contiguous_format); permute_170 = None view_451: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_195, [1, 70, 70, -1]); clone_195 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_30: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_171: "i64[70]" = torch.ops.aten.add.Tensor(iota_30, 67); iota_30 = None fmod_30: "i64[70]" = torch.ops.aten.fmod.Scalar(add_171, 70); add_171 = None slice_531: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_451, 0, 0, 9223372036854775807); view_451 = None index_46: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_531, [None, fmod_30]); slice_531 = fmod_30 = None iota_31: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_172: "i64[70]" = torch.ops.aten.add.Tensor(iota_31, 67); iota_31 = None fmod_31: "i64[70]" = torch.ops.aten.fmod.Scalar(add_172, 70); add_172 = None slice_532: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_46, 0, 0, 9223372036854775807); index_46 = None slice_533: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_532, 1, 0, 9223372036854775807); slice_532 = None index_47: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_533, [None, None, fmod_31]); slice_533 = fmod_31 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_534: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_47, 0, 0, 9223372036854775807); index_47 = None slice_535: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_534, 1, 0, 68); slice_534 = None slice_536: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_535, 2, 0, 68); slice_535 = None slice_537: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_536, 3, 0, 9223372036854775807); slice_536 = None clone_196: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_537, memory_format = torch.contiguous_format); slice_537 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_452: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_196, [1, 4624, 512]); clone_196 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_173: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_164, view_452); add_164 = view_452 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_498: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_173, torch.float32) var_mean_36 = torch.ops.aten.var_mean.correction(convert_element_type_498, [2], correction = 0, keepdim = True) getitem_72: "f32[1, 4624, 1]" = var_mean_36[0] getitem_73: "f32[1, 4624, 1]" = var_mean_36[1]; var_mean_36 = None add_174: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_72, 1e-05); getitem_72 = None rsqrt_36: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_174); add_174 = None sub_55: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_498, getitem_73); convert_element_type_498 = getitem_73 = None mul_139: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_55, rsqrt_36); sub_55 = rsqrt_36 = None mul_140: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_139, arg233_1); mul_139 = arg233_1 = None add_175: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_140, arg234_1); mul_140 = arg234_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_499: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg236_1, torch.float16); arg236_1 = None convert_element_type_500: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg235_1, torch.float16); arg235_1 = None convert_element_type_501: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_175, torch.float16); add_175 = None view_453: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_501, [4624, 512]); convert_element_type_501 = None permute_171: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_500, [1, 0]); convert_element_type_500 = None addmm_62: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_499, view_453, permute_171); convert_element_type_499 = view_453 = permute_171 = None view_454: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_62, [1, 4624, 2048]); addmm_62 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_505: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_454, torch.float32); view_454 = None mul_141: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_505, 0.5) mul_142: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_505, 0.7071067811865476); convert_element_type_505 = None erf_15: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_142); mul_142 = None add_176: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_15, 1); erf_15 = None mul_143: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_141, add_176); mul_141 = add_176 = None convert_element_type_506: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_143, torch.float16); mul_143 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_197: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_506); convert_element_type_506 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_507: "f16[512]" = torch.ops.prims.convert_element_type.default(arg238_1, torch.float16); arg238_1 = None convert_element_type_508: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg237_1, torch.float16); arg237_1 = None view_455: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_197, [4624, 2048]); clone_197 = None permute_172: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_508, [1, 0]); convert_element_type_508 = None addmm_63: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_507, view_455, permute_172); convert_element_type_507 = view_455 = permute_172 = None view_456: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_63, [1, 4624, 512]); addmm_63 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_198: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_456); view_456 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_177: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_173, clone_198); add_173 = clone_198 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_512: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_177, torch.float32) var_mean_37 = torch.ops.aten.var_mean.correction(convert_element_type_512, [2], correction = 0, keepdim = True) getitem_74: "f32[1, 4624, 1]" = var_mean_37[0] getitem_75: "f32[1, 4624, 1]" = var_mean_37[1]; var_mean_37 = None add_178: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_74, 1e-05); getitem_74 = None rsqrt_37: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_178); add_178 = None sub_56: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_512, getitem_75); convert_element_type_512 = getitem_75 = None mul_144: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_56, rsqrt_37); sub_56 = rsqrt_37 = None mul_145: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_144, arg239_1); mul_144 = arg239_1 = None add_179: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_145, arg240_1); mul_145 = arg240_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_457: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_179, [1, 68, 68, 512]); add_179 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_16: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_457, [0, 0, 0, 2, 0, 2], 0.0); view_457 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_458: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_16, [1, 10, 7, 10, 7, 512]); constant_pad_nd_16 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_173: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_458, [0, 1, 3, 2, 4, 5]); view_458 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_199: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_173, memory_format = torch.contiguous_format); permute_173 = None view_459: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_199, [-1, 7, 7, 512]); clone_199 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_460: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_459, [-1, 49, 512]); view_459 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_513: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg242_1, torch.float16); arg242_1 = None convert_element_type_514: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg241_1, torch.float16); arg241_1 = None convert_element_type_515: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_460, torch.float16); view_460 = None view_461: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_515, [4900, 512]); convert_element_type_515 = None permute_174: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_514, [1, 0]); convert_element_type_514 = None addmm_64: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_513, view_461, permute_174); convert_element_type_513 = view_461 = permute_174 = None view_462: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_64, [100, 49, 1536]); addmm_64 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_463: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_462, [100, 49, 3, 16, 32]); view_462 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_175: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_463, [2, 0, 3, 1, 4]); view_463 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_48: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 0) select_49: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 1) select_50: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_175, 0, 2); permute_175 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_146: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_48, 0.1767766952966369); select_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_176: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_49, [0, 1, 3, 2]); select_49 = None expand_64: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_146, [100, 16, 49, 32]); mul_146 = None clone_200: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_64, memory_format = torch.contiguous_format); expand_64 = None view_464: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_200, [1600, 49, 32]); clone_200 = None expand_65: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_176, [100, 16, 32, 49]); permute_176 = None clone_201: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_65, memory_format = torch.contiguous_format); expand_65 = None view_465: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_201, [1600, 32, 49]); clone_201 = None bmm_32: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_464, view_465); view_464 = view_465 = None view_466: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_32, [100, 16, 49, 49]); bmm_32 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_467: "i64[2401]" = torch.ops.aten.view.default(arg244_1, [-1]); arg244_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_48: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg243_1, [view_467]); arg243_1 = view_467 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_468: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_48, [49, 49, -1]); index_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_177: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_468, [2, 0, 1]); view_468 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_202: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_177, memory_format = torch.contiguous_format); permute_177 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_38: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_202, 0); clone_202 = None add_180: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_466, unsqueeze_38); view_466 = unsqueeze_38 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_16: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_180, [-1], True) sub_57: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_180, amax_16); add_180 = amax_16 = None exp_16: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_57); sub_57 = None sum_17: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_16, [-1], True) div_22: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_16, sum_17); exp_16 = sum_17 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_203: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_22); div_22 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_521: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_203, torch.float16); clone_203 = None expand_66: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_521, [100, 16, 49, 49]); convert_element_type_521 = None view_469: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_66, [1600, 49, 49]); expand_66 = None expand_67: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_50, [100, 16, 49, 32]); select_50 = None clone_204: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_67, memory_format = torch.contiguous_format); expand_67 = None view_470: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_204, [1600, 49, 32]); clone_204 = None bmm_33: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_469, view_470); view_469 = view_470 = None view_471: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_33, [100, 16, 49, 32]); bmm_33 = None permute_178: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_471, [0, 2, 1, 3]); view_471 = None clone_205: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_178, memory_format = torch.contiguous_format); permute_178 = None view_472: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_205, [100, 49, 512]); clone_205 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_524: "f16[512]" = torch.ops.prims.convert_element_type.default(arg246_1, torch.float16); arg246_1 = None convert_element_type_525: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg245_1, torch.float16); arg245_1 = None view_473: "f16[4900, 512]" = torch.ops.aten.view.default(view_472, [4900, 512]); view_472 = None permute_179: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_525, [1, 0]); convert_element_type_525 = None addmm_65: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_524, view_473, permute_179); convert_element_type_524 = view_473 = permute_179 = None view_474: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_65, [100, 49, 512]); addmm_65 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_206: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_474); view_474 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_475: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_206, [-1, 7, 7, 512]); clone_206 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_476: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_475, [1, 10, 10, 7, 7, -1]); view_475 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_180: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_476, [0, 1, 3, 2, 4, 5]); view_476 = None clone_207: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_180, memory_format = torch.contiguous_format); permute_180 = None view_477: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_207, [1, 70, 70, -1]); clone_207 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_538: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_477, 0, 0, 9223372036854775807); view_477 = None slice_539: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_538, 1, 0, 68); slice_538 = None slice_540: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_539, 2, 0, 68); slice_539 = None slice_541: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_540, 3, 0, 9223372036854775807); slice_540 = None clone_208: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_541, memory_format = torch.contiguous_format); slice_541 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_478: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_208, [1, 4624, 512]); clone_208 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_181: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_177, view_478); add_177 = view_478 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_529: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_181, torch.float32) var_mean_38 = torch.ops.aten.var_mean.correction(convert_element_type_529, [2], correction = 0, keepdim = True) getitem_76: "f32[1, 4624, 1]" = var_mean_38[0] getitem_77: "f32[1, 4624, 1]" = var_mean_38[1]; var_mean_38 = None add_182: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_76, 1e-05); getitem_76 = None rsqrt_38: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_182); add_182 = None sub_58: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_529, getitem_77); convert_element_type_529 = getitem_77 = None mul_147: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_58, rsqrt_38); sub_58 = rsqrt_38 = None mul_148: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_147, arg247_1); mul_147 = arg247_1 = None add_183: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_148, arg248_1); mul_148 = arg248_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_530: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg250_1, torch.float16); arg250_1 = None convert_element_type_531: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg249_1, torch.float16); arg249_1 = None convert_element_type_532: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_183, torch.float16); add_183 = None view_479: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_532, [4624, 512]); convert_element_type_532 = None permute_181: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_531, [1, 0]); convert_element_type_531 = None addmm_66: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_530, view_479, permute_181); convert_element_type_530 = view_479 = permute_181 = None view_480: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_66, [1, 4624, 2048]); addmm_66 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_536: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_480, torch.float32); view_480 = None mul_149: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_536, 0.5) mul_150: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_536, 0.7071067811865476); convert_element_type_536 = None erf_16: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_150); mul_150 = None add_184: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_16, 1); erf_16 = None mul_151: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_149, add_184); mul_149 = add_184 = None convert_element_type_537: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_151, torch.float16); mul_151 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_209: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_537); convert_element_type_537 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_538: "f16[512]" = torch.ops.prims.convert_element_type.default(arg252_1, torch.float16); arg252_1 = None convert_element_type_539: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg251_1, torch.float16); arg251_1 = None view_481: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_209, [4624, 2048]); clone_209 = None permute_182: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_539, [1, 0]); convert_element_type_539 = None addmm_67: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_538, view_481, permute_182); convert_element_type_538 = view_481 = permute_182 = None view_482: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_67, [1, 4624, 512]); addmm_67 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_210: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_482); view_482 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_185: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_181, clone_210); add_181 = clone_210 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_543: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_185, torch.float32) var_mean_39 = torch.ops.aten.var_mean.correction(convert_element_type_543, [2], correction = 0, keepdim = True) getitem_78: "f32[1, 4624, 1]" = var_mean_39[0] getitem_79: "f32[1, 4624, 1]" = var_mean_39[1]; var_mean_39 = None add_186: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_78, 1e-05); getitem_78 = None rsqrt_39: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_186); add_186 = None sub_59: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_543, getitem_79); convert_element_type_543 = getitem_79 = None mul_152: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_59, rsqrt_39); sub_59 = rsqrt_39 = None mul_153: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_152, arg253_1); mul_152 = arg253_1 = None add_187: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_153, arg254_1); mul_153 = arg254_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_483: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_187, [1, 68, 68, 512]); add_187 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_17: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_483, [0, 0, 0, 2, 0, 2], 0.0); view_483 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_32: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_188: "i64[70]" = torch.ops.aten.add.Tensor(iota_32, 3); iota_32 = None fmod_32: "i64[70]" = torch.ops.aten.fmod.Scalar(add_188, 70); add_188 = None slice_542: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_17, 0, 0, 9223372036854775807); constant_pad_nd_17 = None index_49: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_542, [None, fmod_32]); slice_542 = fmod_32 = None iota_33: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_189: "i64[70]" = torch.ops.aten.add.Tensor(iota_33, 3); iota_33 = None fmod_33: "i64[70]" = torch.ops.aten.fmod.Scalar(add_189, 70); add_189 = None slice_543: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_49, 0, 0, 9223372036854775807); index_49 = None slice_544: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_543, 1, 0, 9223372036854775807); slice_543 = None index_50: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_544, [None, None, fmod_33]); slice_544 = fmod_33 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_484: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_50, [1, 10, 7, 10, 7, 512]); index_50 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_183: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_484, [0, 1, 3, 2, 4, 5]); view_484 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_211: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_183, memory_format = torch.contiguous_format); permute_183 = None view_485: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_211, [-1, 7, 7, 512]); clone_211 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_486: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_485, [-1, 49, 512]); view_485 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_544: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg256_1, torch.float16); arg256_1 = None convert_element_type_545: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg255_1, torch.float16); arg255_1 = None convert_element_type_546: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_486, torch.float16); view_486 = None view_487: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_546, [4900, 512]); convert_element_type_546 = None permute_184: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_545, [1, 0]); convert_element_type_545 = None addmm_68: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_544, view_487, permute_184); convert_element_type_544 = view_487 = permute_184 = None view_488: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_68, [100, 49, 1536]); addmm_68 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_489: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_488, [100, 49, 3, 16, 32]); view_488 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_185: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_489, [2, 0, 3, 1, 4]); view_489 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_51: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 0) select_52: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 1) select_53: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_185, 0, 2); permute_185 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_154: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_51, 0.1767766952966369); select_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_186: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_52, [0, 1, 3, 2]); select_52 = None expand_68: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_154, [100, 16, 49, 32]); mul_154 = None clone_212: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_68, memory_format = torch.contiguous_format); expand_68 = None view_490: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_212, [1600, 49, 32]); clone_212 = None expand_69: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_186, [100, 16, 32, 49]); permute_186 = None clone_213: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_69, memory_format = torch.contiguous_format); expand_69 = None view_491: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_213, [1600, 32, 49]); clone_213 = None bmm_34: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_490, view_491); view_490 = view_491 = None view_492: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_34, [100, 16, 49, 49]); bmm_34 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_493: "i64[2401]" = torch.ops.aten.view.default(arg258_1, [-1]); arg258_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_51: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg257_1, [view_493]); arg257_1 = view_493 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_494: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_51, [49, 49, -1]); index_51 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_187: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_494, [2, 0, 1]); view_494 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_214: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_187, memory_format = torch.contiguous_format); permute_187 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_39: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_214, 0); clone_214 = None add_190: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_492, unsqueeze_39); view_492 = unsqueeze_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_495: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_190, [1, 100, 16, 49, 49]); add_190 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_40: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_41: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_40, 0); unsqueeze_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_191: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_495, unsqueeze_41); view_495 = unsqueeze_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_496: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_191, [-1, 16, 49, 49]); add_191 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_17: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_496, [-1], True) sub_60: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_496, amax_17); view_496 = amax_17 = None exp_17: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_60); sub_60 = None sum_18: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_17, [-1], True) div_23: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_17, sum_18); exp_17 = sum_18 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_215: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_23); div_23 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_552: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_215, torch.float16); clone_215 = None expand_70: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_552, [100, 16, 49, 49]); convert_element_type_552 = None view_497: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_70, [1600, 49, 49]); expand_70 = None expand_71: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_53, [100, 16, 49, 32]); select_53 = None clone_216: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_71, memory_format = torch.contiguous_format); expand_71 = None view_498: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_216, [1600, 49, 32]); clone_216 = None bmm_35: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_497, view_498); view_497 = view_498 = None view_499: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_35, [100, 16, 49, 32]); bmm_35 = None permute_188: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_499, [0, 2, 1, 3]); view_499 = None clone_217: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_188, memory_format = torch.contiguous_format); permute_188 = None view_500: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_217, [100, 49, 512]); clone_217 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_555: "f16[512]" = torch.ops.prims.convert_element_type.default(arg260_1, torch.float16); arg260_1 = None convert_element_type_556: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg259_1, torch.float16); arg259_1 = None view_501: "f16[4900, 512]" = torch.ops.aten.view.default(view_500, [4900, 512]); view_500 = None permute_189: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_556, [1, 0]); convert_element_type_556 = None addmm_69: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_555, view_501, permute_189); convert_element_type_555 = view_501 = permute_189 = None view_502: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_69, [100, 49, 512]); addmm_69 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_218: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_502); view_502 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_503: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_218, [-1, 7, 7, 512]); clone_218 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_504: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_503, [1, 10, 10, 7, 7, -1]); view_503 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_190: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_504, [0, 1, 3, 2, 4, 5]); view_504 = None clone_219: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_190, memory_format = torch.contiguous_format); permute_190 = None view_505: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_219, [1, 70, 70, -1]); clone_219 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_34: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_192: "i64[70]" = torch.ops.aten.add.Tensor(iota_34, 67); iota_34 = None fmod_34: "i64[70]" = torch.ops.aten.fmod.Scalar(add_192, 70); add_192 = None slice_545: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_505, 0, 0, 9223372036854775807); view_505 = None index_52: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_545, [None, fmod_34]); slice_545 = fmod_34 = None iota_35: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_193: "i64[70]" = torch.ops.aten.add.Tensor(iota_35, 67); iota_35 = None fmod_35: "i64[70]" = torch.ops.aten.fmod.Scalar(add_193, 70); add_193 = None slice_546: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_52, 0, 0, 9223372036854775807); index_52 = None slice_547: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_546, 1, 0, 9223372036854775807); slice_546 = None index_53: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_547, [None, None, fmod_35]); slice_547 = fmod_35 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_548: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_53, 0, 0, 9223372036854775807); index_53 = None slice_549: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_548, 1, 0, 68); slice_548 = None slice_550: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_549, 2, 0, 68); slice_549 = None slice_551: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_550, 3, 0, 9223372036854775807); slice_550 = None clone_220: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_551, memory_format = torch.contiguous_format); slice_551 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_506: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_220, [1, 4624, 512]); clone_220 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_194: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_185, view_506); add_185 = view_506 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_560: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_194, torch.float32) var_mean_40 = torch.ops.aten.var_mean.correction(convert_element_type_560, [2], correction = 0, keepdim = True) getitem_80: "f32[1, 4624, 1]" = var_mean_40[0] getitem_81: "f32[1, 4624, 1]" = var_mean_40[1]; var_mean_40 = None add_195: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_80, 1e-05); getitem_80 = None rsqrt_40: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_195); add_195 = None sub_61: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_560, getitem_81); convert_element_type_560 = getitem_81 = None mul_155: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_61, rsqrt_40); sub_61 = rsqrt_40 = None mul_156: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_155, arg261_1); mul_155 = arg261_1 = None add_196: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_156, arg262_1); mul_156 = arg262_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_561: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg264_1, torch.float16); arg264_1 = None convert_element_type_562: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg263_1, torch.float16); arg263_1 = None convert_element_type_563: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_196, torch.float16); add_196 = None view_507: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_563, [4624, 512]); convert_element_type_563 = None permute_191: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_562, [1, 0]); convert_element_type_562 = None addmm_70: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_561, view_507, permute_191); convert_element_type_561 = view_507 = permute_191 = None view_508: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_70, [1, 4624, 2048]); addmm_70 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_567: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_508, torch.float32); view_508 = None mul_157: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_567, 0.5) mul_158: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_567, 0.7071067811865476); convert_element_type_567 = None erf_17: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_158); mul_158 = None add_197: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_17, 1); erf_17 = None mul_159: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_157, add_197); mul_157 = add_197 = None convert_element_type_568: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_159, torch.float16); mul_159 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_221: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_568); convert_element_type_568 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_569: "f16[512]" = torch.ops.prims.convert_element_type.default(arg266_1, torch.float16); arg266_1 = None convert_element_type_570: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg265_1, torch.float16); arg265_1 = None view_509: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_221, [4624, 2048]); clone_221 = None permute_192: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_570, [1, 0]); convert_element_type_570 = None addmm_71: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_569, view_509, permute_192); convert_element_type_569 = view_509 = permute_192 = None view_510: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_71, [1, 4624, 512]); addmm_71 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_222: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_510); view_510 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_198: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_194, clone_222); add_194 = clone_222 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_574: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_198, torch.float32) var_mean_41 = torch.ops.aten.var_mean.correction(convert_element_type_574, [2], correction = 0, keepdim = True) getitem_82: "f32[1, 4624, 1]" = var_mean_41[0] getitem_83: "f32[1, 4624, 1]" = var_mean_41[1]; var_mean_41 = None add_199: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_82, 1e-05); getitem_82 = None rsqrt_41: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_199); add_199 = None sub_62: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_574, getitem_83); convert_element_type_574 = getitem_83 = None mul_160: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_62, rsqrt_41); sub_62 = rsqrt_41 = None mul_161: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_160, arg267_1); mul_160 = arg267_1 = None add_200: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_161, arg268_1); mul_161 = arg268_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_511: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_200, [1, 68, 68, 512]); add_200 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_18: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_511, [0, 0, 0, 2, 0, 2], 0.0); view_511 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_512: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_18, [1, 10, 7, 10, 7, 512]); constant_pad_nd_18 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_193: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_512, [0, 1, 3, 2, 4, 5]); view_512 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_223: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_193, memory_format = torch.contiguous_format); permute_193 = None view_513: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_223, [-1, 7, 7, 512]); clone_223 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_514: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_513, [-1, 49, 512]); view_513 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_575: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg270_1, torch.float16); arg270_1 = None convert_element_type_576: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg269_1, torch.float16); arg269_1 = None convert_element_type_577: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_514, torch.float16); view_514 = None view_515: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_577, [4900, 512]); convert_element_type_577 = None permute_194: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_576, [1, 0]); convert_element_type_576 = None addmm_72: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_575, view_515, permute_194); convert_element_type_575 = view_515 = permute_194 = None view_516: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_72, [100, 49, 1536]); addmm_72 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_517: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_516, [100, 49, 3, 16, 32]); view_516 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_195: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_517, [2, 0, 3, 1, 4]); view_517 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_54: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 0) select_55: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 1) select_56: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_195, 0, 2); permute_195 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_162: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_54, 0.1767766952966369); select_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_196: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_55, [0, 1, 3, 2]); select_55 = None expand_72: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_162, [100, 16, 49, 32]); mul_162 = None clone_224: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_72, memory_format = torch.contiguous_format); expand_72 = None view_518: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_224, [1600, 49, 32]); clone_224 = None expand_73: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_196, [100, 16, 32, 49]); permute_196 = None clone_225: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_73, memory_format = torch.contiguous_format); expand_73 = None view_519: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_225, [1600, 32, 49]); clone_225 = None bmm_36: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_518, view_519); view_518 = view_519 = None view_520: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_36, [100, 16, 49, 49]); bmm_36 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_521: "i64[2401]" = torch.ops.aten.view.default(arg272_1, [-1]); arg272_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_54: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg271_1, [view_521]); arg271_1 = view_521 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_522: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_54, [49, 49, -1]); index_54 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_197: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_522, [2, 0, 1]); view_522 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_226: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_197, memory_format = torch.contiguous_format); permute_197 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_42: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_226, 0); clone_226 = None add_201: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_520, unsqueeze_42); view_520 = unsqueeze_42 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_18: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_201, [-1], True) sub_63: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_201, amax_18); add_201 = amax_18 = None exp_18: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_63); sub_63 = None sum_19: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_18, [-1], True) div_24: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_18, sum_19); exp_18 = sum_19 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_227: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_24); div_24 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_583: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_227, torch.float16); clone_227 = None expand_74: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_583, [100, 16, 49, 49]); convert_element_type_583 = None view_523: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_74, [1600, 49, 49]); expand_74 = None expand_75: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_56, [100, 16, 49, 32]); select_56 = None clone_228: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_75, memory_format = torch.contiguous_format); expand_75 = None view_524: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_228, [1600, 49, 32]); clone_228 = None bmm_37: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_523, view_524); view_523 = view_524 = None view_525: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_37, [100, 16, 49, 32]); bmm_37 = None permute_198: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_525, [0, 2, 1, 3]); view_525 = None clone_229: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_198, memory_format = torch.contiguous_format); permute_198 = None view_526: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_229, [100, 49, 512]); clone_229 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_586: "f16[512]" = torch.ops.prims.convert_element_type.default(arg274_1, torch.float16); arg274_1 = None convert_element_type_587: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg273_1, torch.float16); arg273_1 = None view_527: "f16[4900, 512]" = torch.ops.aten.view.default(view_526, [4900, 512]); view_526 = None permute_199: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_587, [1, 0]); convert_element_type_587 = None addmm_73: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_586, view_527, permute_199); convert_element_type_586 = view_527 = permute_199 = None view_528: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_73, [100, 49, 512]); addmm_73 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_230: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_528); view_528 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_529: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_230, [-1, 7, 7, 512]); clone_230 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_530: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_529, [1, 10, 10, 7, 7, -1]); view_529 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_200: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_530, [0, 1, 3, 2, 4, 5]); view_530 = None clone_231: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_200, memory_format = torch.contiguous_format); permute_200 = None view_531: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_231, [1, 70, 70, -1]); clone_231 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_552: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_531, 0, 0, 9223372036854775807); view_531 = None slice_553: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_552, 1, 0, 68); slice_552 = None slice_554: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_553, 2, 0, 68); slice_553 = None slice_555: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_554, 3, 0, 9223372036854775807); slice_554 = None clone_232: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_555, memory_format = torch.contiguous_format); slice_555 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_532: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_232, [1, 4624, 512]); clone_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_202: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_198, view_532); add_198 = view_532 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_591: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_202, torch.float32) var_mean_42 = torch.ops.aten.var_mean.correction(convert_element_type_591, [2], correction = 0, keepdim = True) getitem_84: "f32[1, 4624, 1]" = var_mean_42[0] getitem_85: "f32[1, 4624, 1]" = var_mean_42[1]; var_mean_42 = None add_203: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_84, 1e-05); getitem_84 = None rsqrt_42: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_203); add_203 = None sub_64: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_591, getitem_85); convert_element_type_591 = getitem_85 = None mul_163: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_64, rsqrt_42); sub_64 = rsqrt_42 = None mul_164: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_163, arg275_1); mul_163 = arg275_1 = None add_204: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_164, arg276_1); mul_164 = arg276_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_592: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg278_1, torch.float16); arg278_1 = None convert_element_type_593: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg277_1, torch.float16); arg277_1 = None convert_element_type_594: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_204, torch.float16); add_204 = None view_533: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_594, [4624, 512]); convert_element_type_594 = None permute_201: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_593, [1, 0]); convert_element_type_593 = None addmm_74: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_592, view_533, permute_201); convert_element_type_592 = view_533 = permute_201 = None view_534: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_74, [1, 4624, 2048]); addmm_74 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_598: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_534, torch.float32); view_534 = None mul_165: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_598, 0.5) mul_166: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_598, 0.7071067811865476); convert_element_type_598 = None erf_18: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_166); mul_166 = None add_205: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_18, 1); erf_18 = None mul_167: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_165, add_205); mul_165 = add_205 = None convert_element_type_599: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_167, torch.float16); mul_167 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_233: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_599); convert_element_type_599 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_600: "f16[512]" = torch.ops.prims.convert_element_type.default(arg280_1, torch.float16); arg280_1 = None convert_element_type_601: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg279_1, torch.float16); arg279_1 = None view_535: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_233, [4624, 2048]); clone_233 = None permute_202: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_601, [1, 0]); convert_element_type_601 = None addmm_75: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_600, view_535, permute_202); convert_element_type_600 = view_535 = permute_202 = None view_536: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_75, [1, 4624, 512]); addmm_75 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_234: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_536); view_536 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_206: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_202, clone_234); add_202 = clone_234 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_605: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_206, torch.float32) var_mean_43 = torch.ops.aten.var_mean.correction(convert_element_type_605, [2], correction = 0, keepdim = True) getitem_86: "f32[1, 4624, 1]" = var_mean_43[0] getitem_87: "f32[1, 4624, 1]" = var_mean_43[1]; var_mean_43 = None add_207: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_86, 1e-05); getitem_86 = None rsqrt_43: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_207); add_207 = None sub_65: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_605, getitem_87); convert_element_type_605 = getitem_87 = None mul_168: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_65, rsqrt_43); sub_65 = rsqrt_43 = None mul_169: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_168, arg281_1); mul_168 = arg281_1 = None add_208: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_169, arg282_1); mul_169 = arg282_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_537: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_208, [1, 68, 68, 512]); add_208 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_19: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_537, [0, 0, 0, 2, 0, 2], 0.0); view_537 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_36: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_209: "i64[70]" = torch.ops.aten.add.Tensor(iota_36, 3); iota_36 = None fmod_36: "i64[70]" = torch.ops.aten.fmod.Scalar(add_209, 70); add_209 = None slice_556: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_19, 0, 0, 9223372036854775807); constant_pad_nd_19 = None index_55: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_556, [None, fmod_36]); slice_556 = fmod_36 = None iota_37: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_210: "i64[70]" = torch.ops.aten.add.Tensor(iota_37, 3); iota_37 = None fmod_37: "i64[70]" = torch.ops.aten.fmod.Scalar(add_210, 70); add_210 = None slice_557: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_55, 0, 0, 9223372036854775807); index_55 = None slice_558: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_557, 1, 0, 9223372036854775807); slice_557 = None index_56: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_558, [None, None, fmod_37]); slice_558 = fmod_37 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_538: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_56, [1, 10, 7, 10, 7, 512]); index_56 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_203: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_538, [0, 1, 3, 2, 4, 5]); view_538 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_235: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_203, memory_format = torch.contiguous_format); permute_203 = None view_539: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_235, [-1, 7, 7, 512]); clone_235 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_540: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_539, [-1, 49, 512]); view_539 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_606: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg284_1, torch.float16); arg284_1 = None convert_element_type_607: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg283_1, torch.float16); arg283_1 = None convert_element_type_608: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_540, torch.float16); view_540 = None view_541: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_608, [4900, 512]); convert_element_type_608 = None permute_204: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_607, [1, 0]); convert_element_type_607 = None addmm_76: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_606, view_541, permute_204); convert_element_type_606 = view_541 = permute_204 = None view_542: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_76, [100, 49, 1536]); addmm_76 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_543: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_542, [100, 49, 3, 16, 32]); view_542 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_205: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_543, [2, 0, 3, 1, 4]); view_543 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_57: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 0) select_58: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 1) select_59: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_205, 0, 2); permute_205 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_170: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_57, 0.1767766952966369); select_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_206: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_58, [0, 1, 3, 2]); select_58 = None expand_76: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_170, [100, 16, 49, 32]); mul_170 = None clone_236: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_76, memory_format = torch.contiguous_format); expand_76 = None view_544: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_236, [1600, 49, 32]); clone_236 = None expand_77: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_206, [100, 16, 32, 49]); permute_206 = None clone_237: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_77, memory_format = torch.contiguous_format); expand_77 = None view_545: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_237, [1600, 32, 49]); clone_237 = None bmm_38: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_544, view_545); view_544 = view_545 = None view_546: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_38, [100, 16, 49, 49]); bmm_38 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_547: "i64[2401]" = torch.ops.aten.view.default(arg286_1, [-1]); arg286_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_57: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg285_1, [view_547]); arg285_1 = view_547 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_548: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_57, [49, 49, -1]); index_57 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_207: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_548, [2, 0, 1]); view_548 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_238: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_207, memory_format = torch.contiguous_format); permute_207 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_43: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_238, 0); clone_238 = None add_211: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_546, unsqueeze_43); view_546 = unsqueeze_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_549: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_211, [1, 100, 16, 49, 49]); add_211 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_44: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1) unsqueeze_45: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_44, 0); unsqueeze_44 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_212: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_549, unsqueeze_45); view_549 = unsqueeze_45 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_550: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_212, [-1, 16, 49, 49]); add_212 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_19: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_550, [-1], True) sub_66: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_550, amax_19); view_550 = amax_19 = None exp_19: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_66); sub_66 = None sum_20: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_19, [-1], True) div_25: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_19, sum_20); exp_19 = sum_20 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_239: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_25); div_25 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_614: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_239, torch.float16); clone_239 = None expand_78: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_614, [100, 16, 49, 49]); convert_element_type_614 = None view_551: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_78, [1600, 49, 49]); expand_78 = None expand_79: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_59, [100, 16, 49, 32]); select_59 = None clone_240: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_79, memory_format = torch.contiguous_format); expand_79 = None view_552: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_240, [1600, 49, 32]); clone_240 = None bmm_39: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_551, view_552); view_551 = view_552 = None view_553: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_39, [100, 16, 49, 32]); bmm_39 = None permute_208: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_553, [0, 2, 1, 3]); view_553 = None clone_241: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_208, memory_format = torch.contiguous_format); permute_208 = None view_554: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_241, [100, 49, 512]); clone_241 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_617: "f16[512]" = torch.ops.prims.convert_element_type.default(arg288_1, torch.float16); arg288_1 = None convert_element_type_618: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg287_1, torch.float16); arg287_1 = None view_555: "f16[4900, 512]" = torch.ops.aten.view.default(view_554, [4900, 512]); view_554 = None permute_209: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_618, [1, 0]); convert_element_type_618 = None addmm_77: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_617, view_555, permute_209); convert_element_type_617 = view_555 = permute_209 = None view_556: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_77, [100, 49, 512]); addmm_77 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_242: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_556); view_556 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_557: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_242, [-1, 7, 7, 512]); clone_242 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_558: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_557, [1, 10, 10, 7, 7, -1]); view_557 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_210: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_558, [0, 1, 3, 2, 4, 5]); view_558 = None clone_243: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_210, memory_format = torch.contiguous_format); permute_210 = None view_559: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_243, [1, 70, 70, -1]); clone_243 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_38: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_213: "i64[70]" = torch.ops.aten.add.Tensor(iota_38, 67); iota_38 = None fmod_38: "i64[70]" = torch.ops.aten.fmod.Scalar(add_213, 70); add_213 = None slice_559: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_559, 0, 0, 9223372036854775807); view_559 = None index_58: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_559, [None, fmod_38]); slice_559 = fmod_38 = None iota_39: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_214: "i64[70]" = torch.ops.aten.add.Tensor(iota_39, 67); iota_39 = None fmod_39: "i64[70]" = torch.ops.aten.fmod.Scalar(add_214, 70); add_214 = None slice_560: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_58, 0, 0, 9223372036854775807); index_58 = None slice_561: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_560, 1, 0, 9223372036854775807); slice_560 = None index_59: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_561, [None, None, fmod_39]); slice_561 = fmod_39 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_562: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_59, 0, 0, 9223372036854775807); index_59 = None slice_563: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_562, 1, 0, 68); slice_562 = None slice_564: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_563, 2, 0, 68); slice_563 = None slice_565: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_564, 3, 0, 9223372036854775807); slice_564 = None clone_244: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_565, memory_format = torch.contiguous_format); slice_565 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_560: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_244, [1, 4624, 512]); clone_244 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_215: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_206, view_560); add_206 = view_560 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_622: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_215, torch.float32) var_mean_44 = torch.ops.aten.var_mean.correction(convert_element_type_622, [2], correction = 0, keepdim = True) getitem_88: "f32[1, 4624, 1]" = var_mean_44[0] getitem_89: "f32[1, 4624, 1]" = var_mean_44[1]; var_mean_44 = None add_216: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_88, 1e-05); getitem_88 = None rsqrt_44: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_216); add_216 = None sub_67: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_622, getitem_89); convert_element_type_622 = getitem_89 = None mul_171: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_67, rsqrt_44); sub_67 = rsqrt_44 = None mul_172: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_171, arg289_1); mul_171 = arg289_1 = None add_217: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_172, arg290_1); mul_172 = arg290_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_623: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg292_1, torch.float16); arg292_1 = None convert_element_type_624: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg291_1, torch.float16); arg291_1 = None convert_element_type_625: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_217, torch.float16); add_217 = None view_561: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_625, [4624, 512]); convert_element_type_625 = None permute_211: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_624, [1, 0]); convert_element_type_624 = None addmm_78: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_623, view_561, permute_211); convert_element_type_623 = view_561 = permute_211 = None view_562: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_78, [1, 4624, 2048]); addmm_78 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_629: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_562, torch.float32); view_562 = None mul_173: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_629, 0.5) mul_174: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_629, 0.7071067811865476); convert_element_type_629 = None erf_19: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_174); mul_174 = None add_218: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_19, 1); erf_19 = None mul_175: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_173, add_218); mul_173 = add_218 = None convert_element_type_630: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_175, torch.float16); mul_175 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_245: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_630); convert_element_type_630 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_631: "f16[512]" = torch.ops.prims.convert_element_type.default(arg294_1, torch.float16); arg294_1 = None convert_element_type_632: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg293_1, torch.float16); arg293_1 = None view_563: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_245, [4624, 2048]); clone_245 = None permute_212: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_632, [1, 0]); convert_element_type_632 = None addmm_79: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_631, view_563, permute_212); convert_element_type_631 = view_563 = permute_212 = None view_564: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_79, [1, 4624, 512]); addmm_79 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_246: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_564); view_564 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_219: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_215, clone_246); add_215 = clone_246 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_636: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_219, torch.float32) var_mean_45 = torch.ops.aten.var_mean.correction(convert_element_type_636, [2], correction = 0, keepdim = True) getitem_90: "f32[1, 4624, 1]" = var_mean_45[0] getitem_91: "f32[1, 4624, 1]" = var_mean_45[1]; var_mean_45 = None add_220: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_90, 1e-05); getitem_90 = None rsqrt_45: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_220); add_220 = None sub_68: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_636, getitem_91); convert_element_type_636 = getitem_91 = None mul_176: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_68, rsqrt_45); sub_68 = rsqrt_45 = None mul_177: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_176, arg295_1); mul_176 = arg295_1 = None add_221: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_177, arg296_1); mul_177 = arg296_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_565: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_221, [1, 68, 68, 512]); add_221 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_20: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_565, [0, 0, 0, 2, 0, 2], 0.0); view_565 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_566: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(constant_pad_nd_20, [1, 10, 7, 10, 7, 512]); constant_pad_nd_20 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_213: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_566, [0, 1, 3, 2, 4, 5]); view_566 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_247: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_213, memory_format = torch.contiguous_format); permute_213 = None view_567: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_247, [-1, 7, 7, 512]); clone_247 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_568: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_567, [-1, 49, 512]); view_567 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_637: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg298_1, torch.float16); arg298_1 = None convert_element_type_638: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg297_1, torch.float16); arg297_1 = None convert_element_type_639: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_568, torch.float16); view_568 = None view_569: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_639, [4900, 512]); convert_element_type_639 = None permute_214: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_638, [1, 0]); convert_element_type_638 = None addmm_80: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_637, view_569, permute_214); convert_element_type_637 = view_569 = permute_214 = None view_570: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_80, [100, 49, 1536]); addmm_80 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_571: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_570, [100, 49, 3, 16, 32]); view_570 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_215: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_571, [2, 0, 3, 1, 4]); view_571 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_60: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 0) select_61: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 1) select_62: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_215, 0, 2); permute_215 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_178: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_60, 0.1767766952966369); select_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_216: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_61, [0, 1, 3, 2]); select_61 = None expand_80: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_178, [100, 16, 49, 32]); mul_178 = None clone_248: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_80, memory_format = torch.contiguous_format); expand_80 = None view_572: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_248, [1600, 49, 32]); clone_248 = None expand_81: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_216, [100, 16, 32, 49]); permute_216 = None clone_249: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_81, memory_format = torch.contiguous_format); expand_81 = None view_573: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_249, [1600, 32, 49]); clone_249 = None bmm_40: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_572, view_573); view_572 = view_573 = None view_574: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_40, [100, 16, 49, 49]); bmm_40 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_575: "i64[2401]" = torch.ops.aten.view.default(arg300_1, [-1]); arg300_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_60: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg299_1, [view_575]); arg299_1 = view_575 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_576: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_60, [49, 49, -1]); index_60 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_217: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_576, [2, 0, 1]); view_576 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_250: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_217, memory_format = torch.contiguous_format); permute_217 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_46: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_250, 0); clone_250 = None add_222: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_574, unsqueeze_46); view_574 = unsqueeze_46 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_20: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(add_222, [-1], True) sub_69: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(add_222, amax_20); add_222 = amax_20 = None exp_20: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_69); sub_69 = None sum_21: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_20, [-1], True) div_26: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_20, sum_21); exp_20 = sum_21 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_251: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_26); div_26 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_645: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_251, torch.float16); clone_251 = None expand_82: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_645, [100, 16, 49, 49]); convert_element_type_645 = None view_577: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_82, [1600, 49, 49]); expand_82 = None expand_83: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_62, [100, 16, 49, 32]); select_62 = None clone_252: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_83, memory_format = torch.contiguous_format); expand_83 = None view_578: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_252, [1600, 49, 32]); clone_252 = None bmm_41: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_577, view_578); view_577 = view_578 = None view_579: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_41, [100, 16, 49, 32]); bmm_41 = None permute_218: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_579, [0, 2, 1, 3]); view_579 = None clone_253: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_218, memory_format = torch.contiguous_format); permute_218 = None view_580: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_253, [100, 49, 512]); clone_253 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_648: "f16[512]" = torch.ops.prims.convert_element_type.default(arg302_1, torch.float16); arg302_1 = None convert_element_type_649: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg301_1, torch.float16); arg301_1 = None view_581: "f16[4900, 512]" = torch.ops.aten.view.default(view_580, [4900, 512]); view_580 = None permute_219: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_649, [1, 0]); convert_element_type_649 = None addmm_81: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_648, view_581, permute_219); convert_element_type_648 = view_581 = permute_219 = None view_582: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_81, [100, 49, 512]); addmm_81 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_254: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_582); view_582 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_583: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_254, [-1, 7, 7, 512]); clone_254 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_584: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_583, [1, 10, 10, 7, 7, -1]); view_583 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_220: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_584, [0, 1, 3, 2, 4, 5]); view_584 = None clone_255: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_220, memory_format = torch.contiguous_format); permute_220 = None view_585: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_255, [1, 70, 70, -1]); clone_255 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_566: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_585, 0, 0, 9223372036854775807); view_585 = None slice_567: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_566, 1, 0, 68); slice_566 = None slice_568: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_567, 2, 0, 68); slice_567 = None slice_569: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_568, 3, 0, 9223372036854775807); slice_568 = None clone_256: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_569, memory_format = torch.contiguous_format); slice_569 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_586: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_256, [1, 4624, 512]); clone_256 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_223: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_219, view_586); add_219 = view_586 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_653: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_223, torch.float32) var_mean_46 = torch.ops.aten.var_mean.correction(convert_element_type_653, [2], correction = 0, keepdim = True) getitem_92: "f32[1, 4624, 1]" = var_mean_46[0] getitem_93: "f32[1, 4624, 1]" = var_mean_46[1]; var_mean_46 = None add_224: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_92, 1e-05); getitem_92 = None rsqrt_46: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_224); add_224 = None sub_70: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_653, getitem_93); convert_element_type_653 = getitem_93 = None mul_179: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_70, rsqrt_46); sub_70 = rsqrt_46 = None mul_180: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_179, arg303_1); mul_179 = arg303_1 = None add_225: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_180, arg304_1); mul_180 = arg304_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_654: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg306_1, torch.float16); arg306_1 = None convert_element_type_655: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg305_1, torch.float16); arg305_1 = None convert_element_type_656: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_225, torch.float16); add_225 = None view_587: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_656, [4624, 512]); convert_element_type_656 = None permute_221: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_655, [1, 0]); convert_element_type_655 = None addmm_82: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_654, view_587, permute_221); convert_element_type_654 = view_587 = permute_221 = None view_588: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_82, [1, 4624, 2048]); addmm_82 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_660: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_588, torch.float32); view_588 = None mul_181: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_660, 0.5) mul_182: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_660, 0.7071067811865476); convert_element_type_660 = None erf_20: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_182); mul_182 = None add_226: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_20, 1); erf_20 = None mul_183: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_181, add_226); mul_181 = add_226 = None convert_element_type_661: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_183, torch.float16); mul_183 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_257: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_661); convert_element_type_661 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_662: "f16[512]" = torch.ops.prims.convert_element_type.default(arg308_1, torch.float16); arg308_1 = None convert_element_type_663: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg307_1, torch.float16); arg307_1 = None view_589: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_257, [4624, 2048]); clone_257 = None permute_222: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_663, [1, 0]); convert_element_type_663 = None addmm_83: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_662, view_589, permute_222); convert_element_type_662 = view_589 = permute_222 = None view_590: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_83, [1, 4624, 512]); addmm_83 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_258: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_590); view_590 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_227: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_223, clone_258); add_223 = clone_258 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_667: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_227, torch.float32) var_mean_47 = torch.ops.aten.var_mean.correction(convert_element_type_667, [2], correction = 0, keepdim = True) getitem_94: "f32[1, 4624, 1]" = var_mean_47[0] getitem_95: "f32[1, 4624, 1]" = var_mean_47[1]; var_mean_47 = None add_228: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_94, 1e-05); getitem_94 = None rsqrt_47: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_228); add_228 = None sub_71: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_667, getitem_95); convert_element_type_667 = getitem_95 = None mul_184: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_71, rsqrt_47); sub_71 = rsqrt_47 = None mul_185: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_184, arg309_1); mul_184 = arg309_1 = None add_229: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_185, arg310_1); mul_185 = arg310_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:269 in forward, code: x = x.view(B, H, W, C) view_591: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_229, [1, 68, 68, 512]); add_229 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) constant_pad_nd_21: "f32[1, 70, 70, 512]" = torch.ops.aten.constant_pad_nd.default(view_591, [0, 0, 0, 2, 0, 2], 0.0); view_591 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:280 in forward, code: shifted_x = torch.roll(x, iota_40: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_230: "i64[70]" = torch.ops.aten.add.Tensor(iota_40, 3); iota_40 = None fmod_40: "i64[70]" = torch.ops.aten.fmod.Scalar(add_230, 70); add_230 = None slice_570: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(constant_pad_nd_21, 0, 0, 9223372036854775807); constant_pad_nd_21 = None index_61: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_570, [None, fmod_40]); slice_570 = fmod_40 = None iota_41: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_231: "i64[70]" = torch.ops.aten.add.Tensor(iota_41, 3); iota_41 = None fmod_41: "i64[70]" = torch.ops.aten.fmod.Scalar(add_231, 70); add_231 = None slice_571: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_61, 0, 0, 9223372036854775807); index_61 = None slice_572: "f32[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_571, 1, 0, 9223372036854775807); slice_571 = None index_62: "f32[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_572, [None, None, fmod_41]); slice_572 = fmod_41 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:75 in window_partition, code: x = x.view(B, H // window_size, window_size, W // window_size, window_size, view_592: "f32[1, 10, 7, 10, 7, 512]" = torch.ops.aten.view.default(index_62, [1, 10, 7, 10, 7, 512]); index_62 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:77 in window_partition, code: windows = x.permute(0, 1, 3, 2, 4, permute_223: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.permute.default(view_592, [0, 1, 3, 2, 4, 5]); view_592 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:78 in window_partition, code: 5).contiguous().view(-1, window_size, window_size, C) clone_259: "f32[1, 10, 10, 7, 7, 512]" = torch.ops.aten.clone.default(permute_223, memory_format = torch.contiguous_format); permute_223 = None view_593: "f32[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_259, [-1, 7, 7, 512]); clone_259 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:291 in forward, code: x_windows = x_windows.view(-1, self.window_size * self.window_size, view_594: "f32[100, 49, 512]" = torch.ops.aten.view.default(view_593, [-1, 49, 512]); view_593 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_668: "f16[1536]" = torch.ops.prims.convert_element_type.default(arg312_1, torch.float16); arg312_1 = None convert_element_type_669: "f16[1536, 512]" = torch.ops.prims.convert_element_type.default(arg311_1, torch.float16); arg311_1 = None convert_element_type_670: "f16[100, 49, 512]" = torch.ops.prims.convert_element_type.default(view_594, torch.float16); view_594 = None view_595: "f16[4900, 512]" = torch.ops.aten.view.default(convert_element_type_670, [4900, 512]); convert_element_type_670 = None permute_224: "f16[512, 1536]" = torch.ops.aten.permute.default(convert_element_type_669, [1, 0]); convert_element_type_669 = None addmm_84: "f16[4900, 1536]" = torch.ops.aten.addmm.default(convert_element_type_668, view_595, permute_224); convert_element_type_668 = view_595 = permute_224 = None view_596: "f16[100, 49, 1536]" = torch.ops.aten.view.default(addmm_84, [100, 49, 1536]); addmm_84 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:164 in forward, code: qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, view_597: "f16[100, 49, 3, 16, 32]" = torch.ops.aten.view.default(view_596, [100, 49, 3, 16, 32]); view_596 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:165 in forward, code: C // self.num_heads).permute(2, 0, 3, 1, 4) permute_225: "f16[3, 100, 16, 49, 32]" = torch.ops.aten.permute.default(view_597, [2, 0, 3, 1, 4]); view_597 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:166 in forward, code: q, k, v = qkv[0], qkv[1], qkv[ select_63: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 0) select_64: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 1) select_65: "f16[100, 16, 49, 32]" = torch.ops.aten.select.int(permute_225, 0, 2); permute_225 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:169 in forward, code: q = q * self.scale mul_186: "f16[100, 16, 49, 32]" = torch.ops.aten.mul.Tensor(select_63, 0.1767766952966369); select_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:170 in forward, code: attn = (q @ k.transpose(-2, -1)) permute_226: "f16[100, 16, 32, 49]" = torch.ops.aten.permute.default(select_64, [0, 1, 3, 2]); select_64 = None expand_84: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(mul_186, [100, 16, 49, 32]); mul_186 = None clone_260: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_84, memory_format = torch.contiguous_format); expand_84 = None view_598: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_260, [1600, 49, 32]); clone_260 = None expand_85: "f16[100, 16, 32, 49]" = torch.ops.aten.expand.default(permute_226, [100, 16, 32, 49]); permute_226 = None clone_261: "f16[100, 16, 32, 49]" = torch.ops.aten.clone.default(expand_85, memory_format = torch.contiguous_format); expand_85 = None view_599: "f16[1600, 32, 49]" = torch.ops.aten.view.default(clone_261, [1600, 32, 49]); clone_261 = None bmm_42: "f16[1600, 49, 49]" = torch.ops.aten.bmm.default(view_598, view_599); view_598 = view_599 = None view_600: "f16[100, 16, 49, 49]" = torch.ops.aten.view.default(bmm_42, [100, 16, 49, 49]); bmm_42 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_601: "i64[2401]" = torch.ops.aten.view.default(arg314_1, [-1]); arg314_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:172 in forward, code: relative_position_bias = self.relative_position_bias_table[ index_63: "f32[2401, 16]" = torch.ops.aten.index.Tensor(arg313_1, [view_601]); arg313_1 = view_601 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:173 in forward, code: self.relative_position_index.view(-1)].view( view_602: "f32[49, 49, 16]" = torch.ops.aten.view.default(index_63, [49, 49, -1]); index_63 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:177 in forward, code: relative_position_bias = relative_position_bias.permute( permute_227: "f32[16, 49, 49]" = torch.ops.aten.permute.default(view_602, [2, 0, 1]); view_602 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:178 in forward, code: 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww clone_262: "f32[16, 49, 49]" = torch.ops.aten.clone.default(permute_227, memory_format = torch.contiguous_format); permute_227 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:179 in forward, code: attn = attn + relative_position_bias.unsqueeze(0) unsqueeze_47: "f32[1, 16, 49, 49]" = torch.ops.aten.unsqueeze.default(clone_262, 0); clone_262 = None add_232: "f32[100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_600, unsqueeze_47); view_600 = unsqueeze_47 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, view_603: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.view.default(add_232, [1, 100, 16, 49, 49]); add_232 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:184 in forward, code: N) + mask.unsqueeze(1).unsqueeze(0) unsqueeze_48: "f32[100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(where_5, 1); where_5 = None unsqueeze_49: "f32[1, 100, 1, 49, 49]" = torch.ops.aten.unsqueeze.default(unsqueeze_48, 0); unsqueeze_48 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:183 in forward, code: attn = attn.view(B_ // nW, nW, self.num_heads, N, add_233: "f32[1, 100, 16, 49, 49]" = torch.ops.aten.add.Tensor(view_603, unsqueeze_49); view_603 = unsqueeze_49 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:185 in forward, code: attn = attn.view(-1, self.num_heads, N, N) view_604: "f32[100, 16, 49, 49]" = torch.ops.aten.view.default(add_233, [-1, 16, 49, 49]); add_233 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:1553 in forward, code: return F.softmax(input, self.dim, _stacklevel=5) amax_21: "f32[100, 16, 49, 1]" = torch.ops.aten.amax.default(view_604, [-1], True) sub_72: "f32[100, 16, 49, 49]" = torch.ops.aten.sub.Tensor(view_604, amax_21); view_604 = amax_21 = None exp_21: "f32[100, 16, 49, 49]" = torch.ops.aten.exp.default(sub_72); sub_72 = None sum_22: "f32[100, 16, 49, 1]" = torch.ops.aten.sum.dim_IntList(exp_21, [-1], True) div_27: "f32[100, 16, 49, 49]" = torch.ops.aten.div.Tensor(exp_21, sum_22); exp_21 = sum_22 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_263: "f32[100, 16, 49, 49]" = torch.ops.aten.clone.default(div_27); div_27 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:192 in forward, code: x = (attn @ v).transpose(1, 2).reshape(B_, N, C) convert_element_type_676: "f16[100, 16, 49, 49]" = torch.ops.prims.convert_element_type.default(clone_263, torch.float16); clone_263 = None expand_86: "f16[100, 16, 49, 49]" = torch.ops.aten.expand.default(convert_element_type_676, [100, 16, 49, 49]); convert_element_type_676 = None view_605: "f16[1600, 49, 49]" = torch.ops.aten.view.default(expand_86, [1600, 49, 49]); expand_86 = None expand_87: "f16[100, 16, 49, 32]" = torch.ops.aten.expand.default(select_65, [100, 16, 49, 32]); select_65 = None clone_264: "f16[100, 16, 49, 32]" = torch.ops.aten.clone.default(expand_87, memory_format = torch.contiguous_format); expand_87 = None view_606: "f16[1600, 49, 32]" = torch.ops.aten.view.default(clone_264, [1600, 49, 32]); clone_264 = None bmm_43: "f16[1600, 49, 32]" = torch.ops.aten.bmm.default(view_605, view_606); view_605 = view_606 = None view_607: "f16[100, 16, 49, 32]" = torch.ops.aten.view.default(bmm_43, [100, 16, 49, 32]); bmm_43 = None permute_228: "f16[100, 49, 16, 32]" = torch.ops.aten.permute.default(view_607, [0, 2, 1, 3]); view_607 = None clone_265: "f16[100, 49, 16, 32]" = torch.ops.aten.clone.default(permute_228, memory_format = torch.contiguous_format); permute_228 = None view_608: "f16[100, 49, 512]" = torch.ops.aten.view.default(clone_265, [100, 49, 512]); clone_265 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_679: "f16[512]" = torch.ops.prims.convert_element_type.default(arg316_1, torch.float16); arg316_1 = None convert_element_type_680: "f16[512, 512]" = torch.ops.prims.convert_element_type.default(arg315_1, torch.float16); arg315_1 = None view_609: "f16[4900, 512]" = torch.ops.aten.view.default(view_608, [4900, 512]); view_608 = None permute_229: "f16[512, 512]" = torch.ops.aten.permute.default(convert_element_type_680, [1, 0]); convert_element_type_680 = None addmm_85: "f16[4900, 512]" = torch.ops.aten.addmm.default(convert_element_type_679, view_609, permute_229); convert_element_type_679 = view_609 = permute_229 = None view_610: "f16[100, 49, 512]" = torch.ops.aten.view.default(addmm_85, [100, 49, 512]); addmm_85 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_266: "f16[100, 49, 512]" = torch.ops.aten.clone.default(view_610); view_610 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:299 in forward, code: attn_windows = attn_windows.view(-1, self.window_size, view_611: "f16[100, 7, 7, 512]" = torch.ops.aten.view.default(clone_266, [-1, 7, 7, 512]); clone_266 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:93 in window_reverse, code: x = windows.view(B, H // window_size, W // window_size, window_size, view_612: "f16[1, 10, 10, 7, 7, 512]" = torch.ops.aten.view.default(view_611, [1, 10, 10, 7, 7, -1]); view_611 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:95 in window_reverse, code: x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) permute_230: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.permute.default(view_612, [0, 1, 3, 2, 4, 5]); view_612 = None clone_267: "f16[1, 10, 7, 10, 7, 512]" = torch.ops.aten.clone.default(permute_230, memory_format = torch.contiguous_format); permute_230 = None view_613: "f16[1, 70, 70, 512]" = torch.ops.aten.view.default(clone_267, [1, 70, 70, -1]); clone_267 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:306 in forward, code: x = torch.roll(shifted_x, iota_42: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_234: "i64[70]" = torch.ops.aten.add.Tensor(iota_42, 67); iota_42 = None fmod_42: "i64[70]" = torch.ops.aten.fmod.Scalar(add_234, 70); add_234 = None slice_573: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(view_613, 0, 0, 9223372036854775807); view_613 = None index_64: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_573, [None, fmod_42]); slice_573 = fmod_42 = None iota_43: "i64[70]" = torch.ops.prims.iota.default(70, start = 0, step = 1, dtype = torch.int64, device = device(type='cuda', index=0), requires_grad = False) add_235: "i64[70]" = torch.ops.aten.add.Tensor(iota_43, 67); iota_43 = None fmod_43: "i64[70]" = torch.ops.aten.fmod.Scalar(add_235, 70); add_235 = None slice_574: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_64, 0, 0, 9223372036854775807); index_64 = None slice_575: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(slice_574, 1, 0, 9223372036854775807); slice_574 = None index_65: "f16[1, 70, 70, 512]" = torch.ops.aten.index.Tensor(slice_575, [None, None, fmod_43]); slice_575 = fmod_43 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:313 in forward, code: x = x[:, :H, :W, :].contiguous() slice_576: "f16[1, 70, 70, 512]" = torch.ops.aten.slice.Tensor(index_65, 0, 0, 9223372036854775807); index_65 = None slice_577: "f16[1, 68, 70, 512]" = torch.ops.aten.slice.Tensor(slice_576, 1, 0, 68); slice_576 = None slice_578: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_577, 2, 0, 68); slice_577 = None slice_579: "f16[1, 68, 68, 512]" = torch.ops.aten.slice.Tensor(slice_578, 3, 0, 9223372036854775807); slice_578 = None clone_268: "f16[1, 68, 68, 512]" = torch.ops.aten.clone.default(slice_579, memory_format = torch.contiguous_format); slice_579 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:315 in forward, code: x = x.view(B, H * W, C) view_614: "f16[1, 4624, 512]" = torch.ops.aten.view.default(clone_268, [1, 4624, 512]); clone_268 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:318 in forward, code: x = shortcut + self.drop_path(x) add_236: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_227, view_614); add_227 = view_614 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_684: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_236, torch.float32) var_mean_48 = torch.ops.aten.var_mean.correction(convert_element_type_684, [2], correction = 0, keepdim = True) getitem_96: "f32[1, 4624, 1]" = var_mean_48[0] getitem_97: "f32[1, 4624, 1]" = var_mean_48[1]; var_mean_48 = None add_237: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_96, 1e-05); getitem_96 = None rsqrt_48: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_237); add_237 = None sub_73: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_684, getitem_97); convert_element_type_684 = getitem_97 = None mul_187: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_73, rsqrt_48); sub_73 = rsqrt_48 = None mul_188: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_187, arg317_1); mul_187 = arg317_1 = None add_238: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_188, arg318_1); mul_188 = arg318_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_685: "f16[2048]" = torch.ops.prims.convert_element_type.default(arg320_1, torch.float16); arg320_1 = None convert_element_type_686: "f16[2048, 512]" = torch.ops.prims.convert_element_type.default(arg319_1, torch.float16); arg319_1 = None convert_element_type_687: "f16[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_238, torch.float16); add_238 = None view_615: "f16[4624, 512]" = torch.ops.aten.view.default(convert_element_type_687, [4624, 512]); convert_element_type_687 = None permute_231: "f16[512, 2048]" = torch.ops.aten.permute.default(convert_element_type_686, [1, 0]); convert_element_type_686 = None addmm_86: "f16[4624, 2048]" = torch.ops.aten.addmm.default(convert_element_type_685, view_615, permute_231); convert_element_type_685 = view_615 = permute_231 = None view_616: "f16[1, 4624, 2048]" = torch.ops.aten.view.default(addmm_86, [1, 4624, 2048]); addmm_86 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/activation.py:704 in forward, code: return F.gelu(input, approximate=self.approximate) convert_element_type_691: "f32[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(view_616, torch.float32); view_616 = None mul_189: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_691, 0.5) mul_190: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(convert_element_type_691, 0.7071067811865476); convert_element_type_691 = None erf_21: "f32[1, 4624, 2048]" = torch.ops.aten.erf.default(mul_190); mul_190 = None add_239: "f32[1, 4624, 2048]" = torch.ops.aten.add.Tensor(erf_21, 1); erf_21 = None mul_191: "f32[1, 4624, 2048]" = torch.ops.aten.mul.Tensor(mul_189, add_239); mul_189 = add_239 = None convert_element_type_692: "f16[1, 4624, 2048]" = torch.ops.prims.convert_element_type.default(mul_191, torch.float16); mul_191 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_269: "f16[1, 4624, 2048]" = torch.ops.aten.clone.default(convert_element_type_692); convert_element_type_692 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) convert_element_type_693: "f16[512]" = torch.ops.prims.convert_element_type.default(arg322_1, torch.float16); arg322_1 = None convert_element_type_694: "f16[512, 2048]" = torch.ops.prims.convert_element_type.default(arg321_1, torch.float16); arg321_1 = None view_617: "f16[4624, 2048]" = torch.ops.aten.view.default(clone_269, [4624, 2048]); clone_269 = None permute_232: "f16[2048, 512]" = torch.ops.aten.permute.default(convert_element_type_694, [1, 0]); convert_element_type_694 = None addmm_87: "f16[4624, 512]" = torch.ops.aten.addmm.default(convert_element_type_693, view_617, permute_232); convert_element_type_693 = view_617 = permute_232 = None view_618: "f16[1, 4624, 512]" = torch.ops.aten.view.default(addmm_87, [1, 4624, 512]); addmm_87 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) clone_270: "f16[1, 4624, 512]" = torch.ops.aten.clone.default(view_618); view_618 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:319 in forward, code: x = x + self.drop_path(self.mlp(self.norm2(x))) add_240: "f16[1, 4624, 512]" = torch.ops.aten.add.Tensor(add_236, clone_270); add_236 = clone_270 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( convert_element_type_698: "f32[1, 4624, 512]" = torch.ops.prims.convert_element_type.default(add_240, torch.float32); add_240 = None var_mean_49 = torch.ops.aten.var_mean.correction(convert_element_type_698, [2], correction = 0, keepdim = True) getitem_98: "f32[1, 4624, 1]" = var_mean_49[0] getitem_99: "f32[1, 4624, 1]" = var_mean_49[1]; var_mean_49 = None add_241: "f32[1, 4624, 1]" = torch.ops.aten.add.Tensor(getitem_98, 1e-05); getitem_98 = None rsqrt_49: "f32[1, 4624, 1]" = torch.ops.aten.rsqrt.default(add_241); add_241 = None sub_74: "f32[1, 4624, 512]" = torch.ops.aten.sub.Tensor(convert_element_type_698, getitem_99); convert_element_type_698 = getitem_99 = None mul_192: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(sub_74, rsqrt_49); sub_74 = rsqrt_49 = None mul_193: "f32[1, 4624, 512]" = torch.ops.aten.mul.Tensor(mul_192, arg323_1); mul_192 = arg323_1 = None add_242: "f32[1, 4624, 512]" = torch.ops.aten.add.Tensor(mul_193, arg324_1); mul_193 = arg324_1 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:715 in forward, code: out = x_out.view(-1, H, W, view_619: "f32[1, 68, 68, 512]" = torch.ops.aten.view.default(add_242, [-1, 68, 68, 512]); add_242 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:716 in forward, code: self.num_features[i]).permute(0, 3, 1, permute_233: "f32[1, 512, 68, 68]" = torch.ops.aten.permute.default(view_619, [0, 3, 1, 2]); view_619 = None # File: /workspace/networks/encoders/swin/swin_transformer.py:717 in forward, code: 2).contiguous() clone_271: "f32[1, 512, 68, 68]" = torch.ops.aten.clone.default(permute_233, memory_format = torch.contiguous_format); permute_233 = None return (clone_27, clone_53, clone_271)