class GraphModule(torch.nn.Module): def forward(self, L_v_: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0", L_self_modules_relative_emb_k_parameters_weight_: "f32[225, 256, 1, 1][256, 1, 1, 1]cuda:0", L_self_modules_relative_emb_k_parameters_bias_: "f32[225][1]cuda:0", L_q_: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0", L_k_: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0"): l_v_ = L_v_ l_self_modules_relative_emb_k_parameters_weight_ = L_self_modules_relative_emb_k_parameters_weight_ l_self_modules_relative_emb_k_parameters_bias_ = L_self_modules_relative_emb_k_parameters_bias_ l_q_ = L_q_ l_k_ = L_k_ # File: /workspace/networks/layers/attention.py:327 in forward, code: memory_mask = torch.ones((1, 1, h, w), device=v.device).float() ones: "f32[1, 1, 68, 68][4624, 4624, 68, 1]cuda:0" = torch.ones((1, 1, 68, 68), device = device(type='cuda', index=0)) memory_mask: "f32[1, 1, 68, 68][4624, 4624, 68, 1]cuda:0" = ones.float(); ones = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/functional.py:4552 in pad, code: return torch._C._nn.pad(input, pad, mode, value) x: "f32[1, 1, 82, 82][6724, 6724, 82, 1]cuda:0" = torch._C._nn.pad(memory_mask, (7, 7, 7, 7), 'constant', 0); memory_mask = None # File: /workspace/networks/layers/attention.py:434 in pad_and_unfold, code: x = F.unfold(x, x_1: "f32[1, 225, 4624][1040400, 4624, 1]cuda:0" = torch.nn.functional.unfold(x, kernel_size = (15, 15), stride = (1, 1), dilation = 1); x = None # File: /workspace/networks/layers/attention.py:328 in forward, code: unfolded_k_mask = self.pad_and_unfold(memory_mask).view( unfolded_k_mask: "f32[1, 1, 225, 4624][1040400, 1040400, 4624, 1]cuda:0" = x_1.view(1, 1, 225, 4624); x_1 = None # File: /workspace/networks/layers/attention.py:330 in forward, code: qk_mask = 1 - unfolded_k_mask qk_mask: "f32[1, 1, 225, 4624][1040400, 1040400, 4624, 1]cuda:0" = 1 - unfolded_k_mask; unfolded_k_mask = None # File: /opt/conda/lib/python3.11/site-packages/torch/nn/modules/conv.py:453 in _conv_forward, code: return F.conv2d(input, weight, bias, self.stride, relative_emb: "f16[1, 225, 68, 68][1040400, 4624, 68, 1]cuda:0" = torch.conv2d(l_q_, l_self_modules_relative_emb_k_parameters_weight_, l_self_modules_relative_emb_k_parameters_bias_, (1, 1), (0, 0), (1, 1), 1); l_self_modules_relative_emb_k_parameters_weight_ = l_self_modules_relative_emb_k_parameters_bias_ = None # File: /workspace/networks/layers/attention.py:335 in forward, code: q = q / self.T q: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0" = l_q_ / 16.0; l_q_ = None # File: /workspace/networks/layers/attention.py:337 in forward, code: q = q.view(-1, self.d_att, h, w) q_1: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0" = q.view(-1, 256, 68, 68); q = None # File: /workspace/networks/layers/attention.py:338 in forward, code: k = k.view(-1, self.d_att, h, w) k: "f16[1, 256, 68, 68][1183744, 4624, 68, 1]cuda:0" = l_k_.view(-1, 256, 68, 68); l_k_ = None # File: /workspace/networks/layers/attention.py:339 in forward, code: v = v.view(-1, self.num_head, hidden_dim, h * w) v: "f16[1, 1, 256, 4624][1183744, 1183744, 4624, 1]cuda:0" = l_v_.view(-1, 1, 256, 4624); l_v_ = None # File: /workspace/networks/layers/attention.py:341 in forward, code: relative_emb = relative_emb.view(n, self.num_head, relative_emb_1: "f16[1, 1, 225, 4624][1040400, 1040400, 4624, 1]cuda:0" = relative_emb.view(1, 1, 225, 4624); relative_emb = None return (q_1, k, v, qk_mask, relative_emb_1)