class GraphModule(torch.nn.Module):
    def forward(self, L_stack0_0_: "f16[4624, 1, 256][256, 256, 1]cuda:0", L_tgt2_: "f16[4624, 1, 256][256, 256, 1]cuda:0", L_tgt_: "f32[4624, 1, 256][1, 1183744, 4624]cuda:0", L_self_modules_norm3_parameters_weight_: "f32[256][1]cuda:0", L_self_modules_norm3_parameters_bias_: "f32[256][1]cuda:0", L_self_modules_linear1_parameters_weight_: "f32[1024, 256][256, 1]cuda:0", L_self_modules_linear1_parameters_bias_: "f32[1024][1]cuda:0", L_self_modules_activation_modules_gn_parameters_weight_: "f32[1024][1]cuda:0", L_self_modules_activation_modules_gn_parameters_bias_: "f32[1024][1]cuda:0", L_self_modules_activation_modules_conv_parameters_weight_: "f32[1024, 1, 5, 5][25, 25, 5, 1]cuda:0", L_self_modules_linear2_parameters_weight_: "f32[256, 1024][1024, 1]cuda:0", L_self_modules_linear2_parameters_bias_: "f32[256][1]cuda:0"):
        l_stack0_0_ = L_stack0_0_
        l_tgt2_ = L_tgt2_
        l_tgt_ = L_tgt_
        l_self_modules_norm3_parameters_weight_ = L_self_modules_norm3_parameters_weight_
        l_self_modules_norm3_parameters_bias_ = L_self_modules_norm3_parameters_bias_
        l_self_modules_linear1_parameters_weight_ = L_self_modules_linear1_parameters_weight_
        l_self_modules_linear1_parameters_bias_ = L_self_modules_linear1_parameters_bias_
        l_self_modules_activation_modules_gn_parameters_weight_ = L_self_modules_activation_modules_gn_parameters_weight_
        l_self_modules_activation_modules_gn_parameters_bias_ = L_self_modules_activation_modules_gn_parameters_bias_
        l_self_modules_activation_modules_conv_parameters_weight_ = L_self_modules_activation_modules_conv_parameters_weight_
        l_self_modules_linear2_parameters_weight_ = L_self_modules_linear2_parameters_weight_
        l_self_modules_linear2_parameters_bias_ = L_self_modules_linear2_parameters_bias_
        
        # File: /workspace/networks/layers/transformer.py:841 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.lst_dropout(tgt2 + tgt3)
        add: "f16[4624, 1, 256][256, 256, 1]cuda:0" = l_tgt2_ + l_stack0_0_;  l_tgt2_ = l_stack0_0_ = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/dropout.py:59 in forward, code: return F.dropout(input, self.p, self.training, self.inplace)
        dropout: "f16[4624, 1, 256][256, 256, 1]cuda:0" = torch.nn.functional.dropout(add, 0.0, False, True);  add = None
        
        # File: /workspace/networks/layers/transformer.py:841 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.lst_dropout(tgt2 + tgt3)
        tgt: "f32[4624, 1, 256][1, 1183744, 4624]cuda:0" = l_tgt_ + dropout;  l_tgt_ = dropout = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:201 in forward, code: return F.layer_norm(
        _tgt: "f32[4624, 1, 256][256, 256, 1]cuda:0" = torch.nn.functional.layer_norm(tgt, (256,), l_self_modules_norm3_parameters_weight_, l_self_modules_norm3_parameters_bias_, 1e-05);  l_self_modules_norm3_parameters_weight_ = l_self_modules_norm3_parameters_bias_ = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias)
        linear: "f16[4624, 1, 1024][1024, 1024, 1]cuda:0" = torch._C._nn.linear(_tgt, l_self_modules_linear1_parameters_weight_, l_self_modules_linear1_parameters_bias_);  _tgt = l_self_modules_linear1_parameters_weight_ = l_self_modules_linear1_parameters_bias_ = None
        
        # File: /workspace/networks/layers/basic.py:30 in forward, code: x = x.view(h, w, bs, c).permute(2, 3, 0, 1)
        view: "f16[68, 68, 1, 1024][69632, 1024, 1024, 1]cuda:0" = linear.view(68, 68, 1, 1024);  linear = None
        x: "f16[1, 1024, 68, 68][1024, 1, 69632, 1024]cuda:0" = view.permute(2, 3, 0, 1);  view = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/normalization.py:287 in forward, code: return F.group_norm(
        x_1: "f32[1, 1024, 68, 68][4734976, 4624, 68, 1]cuda:0" = torch.nn.functional.group_norm(x, 32, l_self_modules_activation_modules_gn_parameters_weight_, l_self_modules_activation_modules_gn_parameters_bias_, 1e-05);  x = l_self_modules_activation_modules_gn_parameters_weight_ = l_self_modules_activation_modules_gn_parameters_bias_ = None
        
        # File: /workspace/networks/layers/basic.py:32 in forward, code: x = F.gelu(x)
        x_2: "f32[1, 1024, 68, 68][4734976, 4624, 68, 1]cuda:0" = torch._C._nn.gelu(x_1);  x_1 = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/conv.py:453 in _conv_forward, code: return F.conv2d(input, weight, bias, self.stride,
        x_3: "f16[1, 1024, 68, 68][4734976, 4624, 68, 1]cuda:0" = torch.conv2d(x_2, l_self_modules_activation_modules_conv_parameters_weight_, None, (1, 1), (2, 2), (1, 1), 1024);  x_2 = l_self_modules_activation_modules_conv_parameters_weight_ = None
        
        # File: /workspace/networks/layers/basic.py:34 in forward, code: x = x.view(bs, c, h * w).permute(2, 0, 1)
        view_1: "f16[1, 1024, 4624][4734976, 4624, 1]cuda:0" = x_3.view(1, 1024, 4624);  x_3 = None
        x_4: "f16[4624, 1, 1024][1, 4734976, 4624]cuda:0" = view_1.permute(2, 0, 1);  view_1 = None
        
        # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/linear.py:116 in forward, code: return F.linear(input, self.weight, self.bias)
        tgt2: "f16[4624, 1, 256][256, 256, 1]cuda:0" = torch._C._nn.linear(x_4, l_self_modules_linear2_parameters_weight_, l_self_modules_linear2_parameters_bias_);  x_4 = l_self_modules_linear2_parameters_weight_ = l_self_modules_linear2_parameters_bias_ = None
        
        # File: /workspace/networks/layers/transformer.py:848 in torch_dynamo_resume_in_forward_at_836, code: tgt = tgt + self.droppath(tgt2)
        tgt_1: "f32[4624, 1, 256][1, 1183744, 4624]cuda:0" = tgt + tgt2;  tgt = tgt2 = None
        return (tgt_1,)