class GraphModule(torch.nn.Module): def forward(self, s0: "Sym(s0)", L_stack0_0_: "f16[s0, 1, 256][256, 256, 1]cuda:0", L_tgt2_: "f16[s0, 1, 256][256, 256, 1]cuda:0", L_tgt_: "f16[s0, 1, 256][1, 256*s0, s0]cuda:0", L_self_modules_norm3_parameters_weight_: "f32[256][1]cuda:0", L_self_modules_norm3_parameters_bias_: "f32[256][1]cuda:0", L_self_modules_linear1_parameters_weight_: "f32[1024, 256][256, 1]cuda:0", L_self_modules_linear1_parameters_bias_: "f32[1024][1]cuda:0", L_size_2d_0_: "Sym(s3)", L_size_2d_1_: "Sym(s4)", L_self_modules_activation_modules_gn_parameters_weight_: "f32[1024][1]cuda:0", L_self_modules_activation_modules_gn_parameters_bias_: "f32[1024][1]cuda:0", L_self_modules_activation_modules_conv_parameters_weight_: "f32[1024, 1, 5, 5][25, 25, 5, 1]cuda:0", L_self_modules_linear2_parameters_weight_: "f32[256, 1024][1024, 1]cuda:0", L_self_modules_linear2_parameters_bias_: "f32[256][1]cuda:0"): l_stack0_0_ = L_stack0_0_ l_tgt2_ = L_tgt2_ l_tgt_ = L_tgt_ l_self_modules_norm3_parameters_weight_ = L_self_modules_norm3_parameters_weight_ l_self_modules_norm3_parameters_bias_ = L_self_modules_norm3_parameters_bias_ l_self_modules_linear1_parameters_weight_ = L_self_modules_linear1_parameters_weight_ l_self_modules_linear1_parameters_bias_ = L_self_modules_linear1_parameters_bias_ l_size_2d_0_ = L_size_2d_0_ l_size_2d_1_ = L_size_2d_1_ l_self_modules_activation_modules_gn_parameters_weight_ = L_self_modules_activation_modules_gn_parameters_weight_ l_self_modules_activation_modules_gn_parameters_bias_ = L_self_modules_activation_modules_gn_parameters_bias_ l_self_modules_activation_modules_conv_parameters_weight_ = L_self_modules_activation_modules_conv_parameters_weight_ l_self_modules_linear2_parameters_weight_ = L_self_modules_linear2_parameters_weight_ l_self_modules_linear2_parameters_bias_ = L_self_modules_linear2_parameters_bias_ # File: /workspace/networks/layers/transformer.py:841 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.lst_dropout(tgt2 + tgt3) add: "f16[s0, 1, 256][256, 256, 1]cuda:0" = l_tgt2_ + l_stack0_0_; l_tgt2_ = l_stack0_0_ = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace) dropout: "f16[s0, 1, 256][256, 256, 1]cuda:0" = torch.nn.functional.dropout(add, 0.0, False, True); add = None # File: /workspace/networks/layers/transformer.py:841 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.lst_dropout(tgt2 + tgt3) tgt: "f16[s0, 1, 256][1, 256*s0, s0]cuda:0" = l_tgt_ + dropout; l_tgt_ = dropout = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm( _tgt: "f32[s0, 1, 256][256, 256, 1]cuda:0" = torch.nn.functional.layer_norm(tgt, (256,), l_self_modules_norm3_parameters_weight_, l_self_modules_norm3_parameters_bias_, 1e-05); l_self_modules_norm3_parameters_weight_ = l_self_modules_norm3_parameters_bias_ = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) linear: "f16[s0, 1, 1024][1024, 1024, 1]cuda:0" = torch._C._nn.linear(_tgt, l_self_modules_linear1_parameters_weight_, l_self_modules_linear1_parameters_bias_); _tgt = l_self_modules_linear1_parameters_weight_ = l_self_modules_linear1_parameters_bias_ = None # File: /workspace/networks/layers/basic.py:30 in forward, code: x = x.view(h, w, bs, c).permute(2, 3, 0, 1) view: "f16[s3, (s0//s3), 1, 1024][1024*((s0//s3)), 1024, 1024, 1]cuda:0" = linear.view(l_size_2d_0_, l_size_2d_1_, 1, 1024); linear = None x: "f16[1, 1024, s3, (s0//s3)][1024, 1, 1024*((s0//s3)), 1024]cuda:0" = view.permute(2, 3, 0, 1); view = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:287 in forward, code: return F.group_norm( x_1: "f32[1, 1024, s3, (s0//s3)][1024*s3*((s0//s3)), s3*((s0//s3)), (s0//s3), 1]cuda:0" = torch.nn.functional.group_norm(x, 32, l_self_modules_activation_modules_gn_parameters_weight_, l_self_modules_activation_modules_gn_parameters_bias_, 1e-05); x = l_self_modules_activation_modules_gn_parameters_weight_ = l_self_modules_activation_modules_gn_parameters_bias_ = None # File: /workspace/networks/layers/basic.py:32 in forward, code: x = F.gelu(x) x_2: "f32[1, 1024, s3, (s0//s3)][1024*s3*((s0//s3)), s3*((s0//s3)), (s0//s3), 1]cuda:0" = torch._C._nn.gelu(x_1); x_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/conv.py:453 in _conv_forward, code: return F.conv2d(input, weight, bias, self.stride, x_3: "f16[1, 1024, s3, (s0//s3)][1024*s3*((s0//s3)), s3*((s0//s3)), (s0//s3), 1]cuda:0" = torch.conv2d(x_2, l_self_modules_activation_modules_conv_parameters_weight_, None, (1, 1), (2, 2), (1, 1), 1024); x_2 = l_self_modules_activation_modules_conv_parameters_weight_ = None # File: /workspace/networks/layers/basic.py:34 in forward, code: x = x.view(bs, c, h * w).permute(2, 0, 1) mul: "Sym(s3*s4)" = l_size_2d_0_ * l_size_2d_1_; l_size_2d_0_ = l_size_2d_1_ = None view_1: "f16[1, 1024, s3*((s0//s3))][1024*s3*((s0//s3)), s3*((s0//s3)), 1]cuda:0" = x_3.view(1, 1024, mul); x_3 = mul = None x_4: "f16[s3*((s0//s3)), 1, 1024][1, 1024*s3*((s0//s3)), s3*((s0//s3))]cuda:0" = view_1.permute(2, 0, 1); view_1 = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias) tgt2: "f16[s3*((s0//s3)), 1, 256][256, 256, 1]cuda:0" = torch._C._nn.linear(x_4, l_self_modules_linear2_parameters_weight_, l_self_modules_linear2_parameters_bias_); x_4 = l_self_modules_linear2_parameters_weight_ = l_self_modules_linear2_parameters_bias_ = None # File: /workspace/networks/layers/transformer.py:848 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.droppath(tgt2) tgt_1: "f16[s0, 1, 256][1, 256*s0, s0]cuda:0" = tgt + tgt2; tgt = tgt2 = None return (tgt_1,)