diff --git a/graph_net/test/swin_t_extract_test.py b/graph_net/test/swin_t_extract_test.py new file mode 100644 index 000000000..2da44e33f --- /dev/null +++ b/graph_net/test/swin_t_extract_test.py @@ -0,0 +1,56 @@ +import argparse +import os + +import torch +from torchvision import transforms +from torchvision.models import get_model, get_model_weights + +import graph_net + + +def extract_swin_t_graph(model_name: str, model_path: str): + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + batch_size = 1 + height, width = 224, 224 + num_channels = 3 + random_input = torch.rand(batch_size, num_channels, height, width) + normalized_input = normalize(random_input) + + weights = None + try: + w = get_model_weights(model_path) + weights = w.DEFAULT + except Exception: + pass + + model = get_model(model_path, weights=weights) + model.eval() + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model.to(device) + normalized_input = normalized_input.to(device) + + model = graph_net.torch.extract(name=model_name, dynamic=False)(model) + + print("Running inference...") + print("Input shape:", normalized_input.shape) + with torch.no_grad(): + output = model(normalized_input) + print("Inference finished. Output shape:", output.shape) + + +if __name__ == "__main__": + workspace_default = os.environ.get("GRAPH_NET_EXTRACT_WORKSPACE", "workspace") + + parser = argparse.ArgumentParser() + parser.add_argument("--model_name", type=str, default="swin_t") + parser.add_argument("--model_path", type=str, default="swin_t") + parser.add_argument("--workspace", type=str, default=workspace_default) + args = parser.parse_args() + + os.environ["GRAPH_NET_EXTRACT_WORKSPACE"] = args.workspace + + extract_swin_t_graph(args.model_name, args.model_path) diff --git a/samples/torchvision/swin_t/graph_hash.txt b/samples/torchvision/swin_t/graph_hash.txt new file mode 100644 index 000000000..0132c13dd --- /dev/null +++ b/samples/torchvision/swin_t/graph_hash.txt @@ -0,0 +1 @@ +7d17ac7dfc2ce690ae08f3a44dc915b308b8995d29b685a1d4e1a51741d03ffc \ No newline at end of file diff --git a/samples/torchvision/swin_t/graph_net.json b/samples/torchvision/swin_t/graph_net.json new file mode 100644 index 000000000..b2ad1ba83 --- /dev/null +++ b/samples/torchvision/swin_t/graph_net.json @@ -0,0 +1,7 @@ +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "swin_t" +} \ No newline at end of file diff --git a/samples/torchvision/swin_t/input_meta.py b/samples/torchvision/swin_t/input_meta.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/torchvision/swin_t/input_tensor_constraints.py b/samples/torchvision/swin_t/input_tensor_constraints.py new file mode 100644 index 000000000..e69de29bb diff --git a/samples/torchvision/swin_t/model.py b/samples/torchvision/swin_t/model.py new file mode 100644 index 000000000..b84b6bb3e --- /dev/null +++ b/samples/torchvision/swin_t/model.py @@ -0,0 +1,866 @@ +import torch + +class GraphModule(torch.nn.Module): + + + + def forward(self, L_self_modules_features_modules_0_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_0_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_x_ : torch.Tensor, L_self_modules_features_modules_0_modules_2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_0_modules_2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_2_modules_norm_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_2_modules_norm_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_2_modules_reduction_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_4_modules_norm_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_4_modules_norm_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_4_modules_reduction_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_6_modules_norm_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_6_modules_norm_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_6_modules_reduction_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_ : torch.Tensor, L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_norm_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_norm_parameters_bias_ : torch.nn.parameter.Parameter, L_self_modules_head_parameters_weight_ : torch.nn.parameter.Parameter, L_self_modules_head_parameters_bias_ : torch.nn.parameter.Parameter): + l_self_modules_features_modules_0_modules_0_parameters_weight_ = L_self_modules_features_modules_0_modules_0_parameters_weight_ + l_self_modules_features_modules_0_modules_0_parameters_bias_ = L_self_modules_features_modules_0_modules_0_parameters_bias_ + l_x_ = L_x_ + l_self_modules_features_modules_0_modules_2_parameters_weight_ = L_self_modules_features_modules_0_modules_2_parameters_weight_ + l_self_modules_features_modules_0_modules_2_parameters_bias_ = L_self_modules_features_modules_0_modules_2_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_2_modules_norm_parameters_weight_ = L_self_modules_features_modules_2_modules_norm_parameters_weight_ + l_self_modules_features_modules_2_modules_norm_parameters_bias_ = L_self_modules_features_modules_2_modules_norm_parameters_bias_ + l_self_modules_features_modules_2_modules_reduction_parameters_weight_ = L_self_modules_features_modules_2_modules_reduction_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_ + l_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_ + l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_4_modules_norm_parameters_weight_ = L_self_modules_features_modules_4_modules_norm_parameters_weight_ + l_self_modules_features_modules_4_modules_norm_parameters_bias_ = L_self_modules_features_modules_4_modules_norm_parameters_bias_ + l_self_modules_features_modules_4_modules_reduction_parameters_weight_ = L_self_modules_features_modules_4_modules_reduction_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_6_modules_norm_parameters_weight_ = L_self_modules_features_modules_6_modules_norm_parameters_weight_ + l_self_modules_features_modules_6_modules_norm_parameters_bias_ = L_self_modules_features_modules_6_modules_norm_parameters_bias_ + l_self_modules_features_modules_6_modules_reduction_parameters_weight_ = L_self_modules_features_modules_6_modules_reduction_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_ + l_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_ + l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_ = L_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_ + l_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_ = L_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_ + l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_ + l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_ = L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_ + l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_ = L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_ + l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_ + l_self_modules_norm_parameters_bias_ = L_self_modules_norm_parameters_bias_ + l_self_modules_head_parameters_weight_ = L_self_modules_head_parameters_weight_ + l_self_modules_head_parameters_bias_ = L_self_modules_head_parameters_bias_ + input_1 = torch.conv2d(l_x_, l_self_modules_features_modules_0_modules_0_parameters_weight_, l_self_modules_features_modules_0_modules_0_parameters_bias_, (4, 4), (0, 0), (1, 1), 1); l_x_ = l_self_modules_features_modules_0_modules_0_parameters_weight_ = l_self_modules_features_modules_0_modules_0_parameters_bias_ = None + input_2 = torch.permute(input_1, [0, 2, 3, 1]); input_1 = None + input_3 = torch.nn.functional.layer_norm(input_2, (96,), l_self_modules_features_modules_0_modules_2_parameters_weight_, l_self_modules_features_modules_0_modules_2_parameters_bias_, 1e-05); input_2 = l_self_modules_features_modules_0_modules_2_parameters_weight_ = l_self_modules_features_modules_0_modules_2_parameters_bias_ = None + layer_norm_1 = torch.nn.functional.layer_norm(input_3, (96,), l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_ = None + relative_position_bias = l_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_1 = relative_position_bias.view(49, 49, -1); relative_position_bias = None + permute_1 = relative_position_bias_1.permute(2, 0, 1); relative_position_bias_1 = None + contiguous = permute_1.contiguous(); permute_1 = None + relative_position_bias_2 = contiguous.unsqueeze(0); contiguous = None + x = torch.nn.functional.pad(layer_norm_1, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_1 = None + x_1 = x.view(1, 8, 7, 8, 7, 96); x = None + permute_2 = x_1.permute(0, 1, 3, 2, 4, 5); x_1 = None + x_2 = permute_2.reshape(64, 49, 96); permute_2 = None + qkv = torch.nn.functional.linear(x_2, l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_); x_2 = l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_ = None + reshape_1 = qkv.reshape(64, 49, 3, 3, 32); qkv = None + qkv_1 = reshape_1.permute(2, 0, 3, 1, 4); reshape_1 = None + q = qkv_1[0] + k = qkv_1[1] + v = qkv_1[2]; qkv_1 = None + q_1 = q * 0.1767766952966369; q = None + transpose = k.transpose(-2, -1); k = None + attn = q_1.matmul(transpose); q_1 = transpose = None + attn_1 = attn + relative_position_bias_2; attn = relative_position_bias_2 = None + attn_2 = torch.nn.functional.softmax(attn_1, dim = -1); attn_1 = None + attn_3 = torch.nn.functional.dropout(attn_2, p = 0.0, training = False); attn_2 = None + matmul_1 = attn_3.matmul(v); attn_3 = v = None + transpose_1 = matmul_1.transpose(1, 2); matmul_1 = None + x_3 = transpose_1.reshape(64, 49, 96); transpose_1 = None + x_4 = torch.nn.functional.linear(x_3, l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_); x_3 = l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_ = None + x_5 = torch.nn.functional.dropout(x_4, p = 0.0, training = False); x_4 = None + x_6 = x_5.view(1, 8, 8, 7, 7, 96); x_5 = None + permute_4 = x_6.permute(0, 1, 3, 2, 4, 5); x_6 = None + x_7 = permute_4.reshape(1, 56, 56, 96); permute_4 = None + getitem_4 = x_7[(slice(None, None, None), slice(None, 56, None), slice(None, 56, None), slice(None, None, None))]; x_7 = None + x_8 = getitem_4.contiguous(); getitem_4 = None + _log_api_usage_once = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once = None + x_9 = input_3 + x_8; input_3 = x_8 = None + layer_norm_2 = torch.nn.functional.layer_norm(x_9, (96,), l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_ = None + input_4 = torch.nn.functional.linear(layer_norm_2, l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_); layer_norm_2 = l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_ = None + input_5 = torch.nn.functional.gelu(input_4, approximate = 'none'); input_4 = None + input_6 = torch.nn.functional.dropout(input_5, 0.0, False, False); input_5 = None + input_7 = torch.nn.functional.linear(input_6, l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_); input_6 = l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_ = None + input_8 = torch.nn.functional.dropout(input_7, 0.0, False, False); input_7 = None + _log_api_usage_once_1 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_1 = None + x_10 = x_9 + input_8; x_9 = input_8 = None + layer_norm_3 = torch.nn.functional.layer_norm(x_10, (96,), l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_ = None + relative_position_bias_3 = l_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_4 = relative_position_bias_3.view(49, 49, -1); relative_position_bias_3 = None + permute_5 = relative_position_bias_4.permute(2, 0, 1); relative_position_bias_4 = None + contiguous_2 = permute_5.contiguous(); permute_5 = None + relative_position_bias_5 = contiguous_2.unsqueeze(0); contiguous_2 = None + x_11 = torch.nn.functional.pad(layer_norm_3, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_3 = None + x_12 = torch.roll(x_11, shifts = (-3, -3), dims = (1, 2)); x_11 = None + x_13 = x_12.view(1, 8, 7, 8, 7, 96); x_12 = None + permute_6 = x_13.permute(0, 1, 3, 2, 4, 5); x_13 = None + x_14 = permute_6.reshape(64, 49, 96); permute_6 = None + qkv_2 = torch.nn.functional.linear(x_14, l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_); l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_ = None + reshape_5 = qkv_2.reshape(64, 49, 3, 3, 32); qkv_2 = None + qkv_3 = reshape_5.permute(2, 0, 3, 1, 4); reshape_5 = None + q_2 = qkv_3[0] + k_1 = qkv_3[1] + v_1 = qkv_3[2]; qkv_3 = None + q_3 = q_2 * 0.1767766952966369; q_2 = None + transpose_2 = k_1.transpose(-2, -1); k_1 = None + attn_4 = q_3.matmul(transpose_2); q_3 = transpose_2 = None + attn_5 = attn_4 + relative_position_bias_5; attn_4 = relative_position_bias_5 = None + attn_mask = x_14.new_zeros((56, 56)); x_14 = None + attn_mask[(slice(0, -7, None), slice(0, -7, None))] = 0; setitem = attn_mask; setitem = None + attn_mask[(slice(0, -7, None), slice(-7, -3, None))] = 1; setitem_1 = attn_mask; setitem_1 = None + attn_mask[(slice(0, -7, None), slice(-3, None, None))] = 2; setitem_2 = attn_mask; setitem_2 = None + attn_mask[(slice(-7, -3, None), slice(0, -7, None))] = 3; setitem_3 = attn_mask; setitem_3 = None + attn_mask[(slice(-7, -3, None), slice(-7, -3, None))] = 4; setitem_4 = attn_mask; setitem_4 = None + attn_mask[(slice(-7, -3, None), slice(-3, None, None))] = 5; setitem_5 = attn_mask; setitem_5 = None + attn_mask[(slice(-3, None, None), slice(0, -7, None))] = 6; setitem_6 = attn_mask; setitem_6 = None + attn_mask[(slice(-3, None, None), slice(-7, -3, None))] = 7; setitem_7 = attn_mask; setitem_7 = None + attn_mask[(slice(-3, None, None), slice(-3, None, None))] = 8; setitem_8 = attn_mask; setitem_8 = None + attn_mask_1 = attn_mask.view(8, 7, 8, 7); attn_mask = None + permute_8 = attn_mask_1.permute(0, 2, 1, 3); attn_mask_1 = None + attn_mask_2 = permute_8.reshape(64, 49); permute_8 = None + unsqueeze_2 = attn_mask_2.unsqueeze(1) + unsqueeze_3 = attn_mask_2.unsqueeze(2); attn_mask_2 = None + attn_mask_3 = unsqueeze_2 - unsqueeze_3; unsqueeze_2 = unsqueeze_3 = None + ne = attn_mask_3 != 0 + masked_fill = attn_mask_3.masked_fill(ne, -100.0); ne = None + eq = attn_mask_3.__eq__(0); attn_mask_3 = None + attn_mask_4 = masked_fill.masked_fill(eq, 0.0); masked_fill = eq = None + attn_6 = attn_5.view(1, 64, 3, 49, 49); attn_5 = None + unsqueeze_4 = attn_mask_4.unsqueeze(1); attn_mask_4 = None + unsqueeze_5 = unsqueeze_4.unsqueeze(0); unsqueeze_4 = None + attn_7 = attn_6 + unsqueeze_5; attn_6 = unsqueeze_5 = None + attn_8 = attn_7.view(-1, 3, 49, 49); attn_7 = None + attn_9 = torch.nn.functional.softmax(attn_8, dim = -1); attn_8 = None + attn_10 = torch.nn.functional.dropout(attn_9, p = 0.0, training = False); attn_9 = None + matmul_3 = attn_10.matmul(v_1); attn_10 = v_1 = None + transpose_3 = matmul_3.transpose(1, 2); matmul_3 = None + x_15 = transpose_3.reshape(64, 49, 96); transpose_3 = None + x_16 = torch.nn.functional.linear(x_15, l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_); x_15 = l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_ = None + x_17 = torch.nn.functional.dropout(x_16, p = 0.0, training = False); x_16 = None + x_18 = x_17.view(1, 8, 8, 7, 7, 96); x_17 = None + permute_9 = x_18.permute(0, 1, 3, 2, 4, 5); x_18 = None + x_19 = permute_9.reshape(1, 56, 56, 96); permute_9 = None + x_20 = torch.roll(x_19, shifts = (3, 3), dims = (1, 2)); x_19 = None + getitem_9 = x_20[(slice(None, None, None), slice(None, 56, None), slice(None, 56, None), slice(None, None, None))]; x_20 = None + x_21 = getitem_9.contiguous(); getitem_9 = None + _log_api_usage_once_2 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_2 = None + x_22 = x_10 + x_21; x_10 = x_21 = None + layer_norm_4 = torch.nn.functional.layer_norm(x_22, (96,), l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_ = None + input_9 = torch.nn.functional.linear(layer_norm_4, l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_); layer_norm_4 = l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_ = None + input_10 = torch.nn.functional.gelu(input_9, approximate = 'none'); input_9 = None + input_11 = torch.nn.functional.dropout(input_10, 0.0, False, False); input_10 = None + input_12 = torch.nn.functional.linear(input_11, l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_); input_11 = l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_ = None + input_13 = torch.nn.functional.dropout(input_12, 0.0, False, False); input_12 = None + _log_api_usage_once_3 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_3 = None + x_23 = x_22 + input_13; x_22 = input_13 = None + x_24 = torch.nn.functional.pad(x_23, (0, 0, 0, 0, 0, 0), 'constant', None); x_23 = None + x0 = x_24[(Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None, None, None))] + x1 = x_24[(Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None, None, None))] + x2 = x_24[(Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None, None, None))] + x3 = x_24[(Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None, None, None))]; x_24 = None + x_25 = torch.cat([x0, x1, x2, x3], -1); x0 = x1 = x2 = x3 = None + x_26 = torch.nn.functional.layer_norm(x_25, (384,), l_self_modules_features_modules_2_modules_norm_parameters_weight_, l_self_modules_features_modules_2_modules_norm_parameters_bias_, 1e-05); x_25 = l_self_modules_features_modules_2_modules_norm_parameters_weight_ = l_self_modules_features_modules_2_modules_norm_parameters_bias_ = None + x_27 = torch.nn.functional.linear(x_26, l_self_modules_features_modules_2_modules_reduction_parameters_weight_, None); x_26 = l_self_modules_features_modules_2_modules_reduction_parameters_weight_ = None + layer_norm_6 = torch.nn.functional.layer_norm(x_27, (192,), l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_ = None + relative_position_bias_6 = l_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_7 = relative_position_bias_6.view(49, 49, -1); relative_position_bias_6 = None + permute_10 = relative_position_bias_7.permute(2, 0, 1); relative_position_bias_7 = None + contiguous_4 = permute_10.contiguous(); permute_10 = None + relative_position_bias_8 = contiguous_4.unsqueeze(0); contiguous_4 = None + x_28 = torch.nn.functional.pad(layer_norm_6, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_6 = None + x_29 = x_28.view(1, 4, 7, 4, 7, 192); x_28 = None + permute_11 = x_29.permute(0, 1, 3, 2, 4, 5); x_29 = None + x_30 = permute_11.reshape(16, 49, 192); permute_11 = None + qkv_4 = torch.nn.functional.linear(x_30, l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_); x_30 = l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_ = None + reshape_10 = qkv_4.reshape(16, 49, 3, 6, 32); qkv_4 = None + qkv_5 = reshape_10.permute(2, 0, 3, 1, 4); reshape_10 = None + q_4 = qkv_5[0] + k_2 = qkv_5[1] + v_2 = qkv_5[2]; qkv_5 = None + q_5 = q_4 * 0.1767766952966369; q_4 = None + transpose_4 = k_2.transpose(-2, -1); k_2 = None + attn_11 = q_5.matmul(transpose_4); q_5 = transpose_4 = None + attn_12 = attn_11 + relative_position_bias_8; attn_11 = relative_position_bias_8 = None + attn_13 = torch.nn.functional.softmax(attn_12, dim = -1); attn_12 = None + attn_14 = torch.nn.functional.dropout(attn_13, p = 0.0, training = False); attn_13 = None + matmul_5 = attn_14.matmul(v_2); attn_14 = v_2 = None + transpose_5 = matmul_5.transpose(1, 2); matmul_5 = None + x_31 = transpose_5.reshape(16, 49, 192); transpose_5 = None + x_32 = torch.nn.functional.linear(x_31, l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_); x_31 = l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_ = None + x_33 = torch.nn.functional.dropout(x_32, p = 0.0, training = False); x_32 = None + x_34 = x_33.view(1, 4, 4, 7, 7, 192); x_33 = None + permute_13 = x_34.permute(0, 1, 3, 2, 4, 5); x_34 = None + x_35 = permute_13.reshape(1, 28, 28, 192); permute_13 = None + getitem_18 = x_35[(slice(None, None, None), slice(None, 28, None), slice(None, 28, None), slice(None, None, None))]; x_35 = None + x_36 = getitem_18.contiguous(); getitem_18 = None + _log_api_usage_once_4 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_4 = None + x_37 = x_27 + x_36; x_27 = x_36 = None + layer_norm_7 = torch.nn.functional.layer_norm(x_37, (192,), l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_ = None + input_14 = torch.nn.functional.linear(layer_norm_7, l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_); layer_norm_7 = l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_ = None + input_15 = torch.nn.functional.gelu(input_14, approximate = 'none'); input_14 = None + input_16 = torch.nn.functional.dropout(input_15, 0.0, False, False); input_15 = None + input_17 = torch.nn.functional.linear(input_16, l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_); input_16 = l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_ = None + input_18 = torch.nn.functional.dropout(input_17, 0.0, False, False); input_17 = None + _log_api_usage_once_5 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_5 = None + x_38 = x_37 + input_18; x_37 = input_18 = None + layer_norm_8 = torch.nn.functional.layer_norm(x_38, (192,), l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_ = None + relative_position_bias_9 = l_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_10 = relative_position_bias_9.view(49, 49, -1); relative_position_bias_9 = None + permute_14 = relative_position_bias_10.permute(2, 0, 1); relative_position_bias_10 = None + contiguous_6 = permute_14.contiguous(); permute_14 = None + relative_position_bias_11 = contiguous_6.unsqueeze(0); contiguous_6 = None + x_39 = torch.nn.functional.pad(layer_norm_8, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_8 = None + x_40 = torch.roll(x_39, shifts = (-3, -3), dims = (1, 2)); x_39 = None + x_41 = x_40.view(1, 4, 7, 4, 7, 192); x_40 = None + permute_15 = x_41.permute(0, 1, 3, 2, 4, 5); x_41 = None + x_42 = permute_15.reshape(16, 49, 192); permute_15 = None + qkv_6 = torch.nn.functional.linear(x_42, l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_); l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_ = None + reshape_14 = qkv_6.reshape(16, 49, 3, 6, 32); qkv_6 = None + qkv_7 = reshape_14.permute(2, 0, 3, 1, 4); reshape_14 = None + q_6 = qkv_7[0] + k_3 = qkv_7[1] + v_3 = qkv_7[2]; qkv_7 = None + q_7 = q_6 * 0.1767766952966369; q_6 = None + transpose_6 = k_3.transpose(-2, -1); k_3 = None + attn_15 = q_7.matmul(transpose_6); q_7 = transpose_6 = None + attn_16 = attn_15 + relative_position_bias_11; attn_15 = relative_position_bias_11 = None + attn_mask_5 = x_42.new_zeros((28, 28)); x_42 = None + attn_mask_5[(slice(0, -7, None), slice(0, -7, None))] = 0; setitem_9 = attn_mask_5; setitem_9 = None + attn_mask_5[(slice(0, -7, None), slice(-7, -3, None))] = 1; setitem_10 = attn_mask_5; setitem_10 = None + attn_mask_5[(slice(0, -7, None), slice(-3, None, None))] = 2; setitem_11 = attn_mask_5; setitem_11 = None + attn_mask_5[(slice(-7, -3, None), slice(0, -7, None))] = 3; setitem_12 = attn_mask_5; setitem_12 = None + attn_mask_5[(slice(-7, -3, None), slice(-7, -3, None))] = 4; setitem_13 = attn_mask_5; setitem_13 = None + attn_mask_5[(slice(-7, -3, None), slice(-3, None, None))] = 5; setitem_14 = attn_mask_5; setitem_14 = None + attn_mask_5[(slice(-3, None, None), slice(0, -7, None))] = 6; setitem_15 = attn_mask_5; setitem_15 = None + attn_mask_5[(slice(-3, None, None), slice(-7, -3, None))] = 7; setitem_16 = attn_mask_5; setitem_16 = None + attn_mask_5[(slice(-3, None, None), slice(-3, None, None))] = 8; setitem_17 = attn_mask_5; setitem_17 = None + attn_mask_6 = attn_mask_5.view(4, 7, 4, 7); attn_mask_5 = None + permute_17 = attn_mask_6.permute(0, 2, 1, 3); attn_mask_6 = None + attn_mask_7 = permute_17.reshape(16, 49); permute_17 = None + unsqueeze_8 = attn_mask_7.unsqueeze(1) + unsqueeze_9 = attn_mask_7.unsqueeze(2); attn_mask_7 = None + attn_mask_8 = unsqueeze_8 - unsqueeze_9; unsqueeze_8 = unsqueeze_9 = None + ne_1 = attn_mask_8 != 0 + masked_fill_2 = attn_mask_8.masked_fill(ne_1, -100.0); ne_1 = None + eq_1 = attn_mask_8.__eq__(0); attn_mask_8 = None + attn_mask_9 = masked_fill_2.masked_fill(eq_1, 0.0); masked_fill_2 = eq_1 = None + attn_17 = attn_16.view(1, 16, 6, 49, 49); attn_16 = None + unsqueeze_10 = attn_mask_9.unsqueeze(1); attn_mask_9 = None + unsqueeze_11 = unsqueeze_10.unsqueeze(0); unsqueeze_10 = None + attn_18 = attn_17 + unsqueeze_11; attn_17 = unsqueeze_11 = None + attn_19 = attn_18.view(-1, 6, 49, 49); attn_18 = None + attn_20 = torch.nn.functional.softmax(attn_19, dim = -1); attn_19 = None + attn_21 = torch.nn.functional.dropout(attn_20, p = 0.0, training = False); attn_20 = None + matmul_7 = attn_21.matmul(v_3); attn_21 = v_3 = None + transpose_7 = matmul_7.transpose(1, 2); matmul_7 = None + x_43 = transpose_7.reshape(16, 49, 192); transpose_7 = None + x_44 = torch.nn.functional.linear(x_43, l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_); x_43 = l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_ = None + x_45 = torch.nn.functional.dropout(x_44, p = 0.0, training = False); x_44 = None + x_46 = x_45.view(1, 4, 4, 7, 7, 192); x_45 = None + permute_18 = x_46.permute(0, 1, 3, 2, 4, 5); x_46 = None + x_47 = permute_18.reshape(1, 28, 28, 192); permute_18 = None + x_48 = torch.roll(x_47, shifts = (3, 3), dims = (1, 2)); x_47 = None + getitem_23 = x_48[(slice(None, None, None), slice(None, 28, None), slice(None, 28, None), slice(None, None, None))]; x_48 = None + x_49 = getitem_23.contiguous(); getitem_23 = None + _log_api_usage_once_6 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_6 = None + x_50 = x_38 + x_49; x_38 = x_49 = None + layer_norm_9 = torch.nn.functional.layer_norm(x_50, (192,), l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_ = None + input_19 = torch.nn.functional.linear(layer_norm_9, l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_); layer_norm_9 = l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_ = None + input_20 = torch.nn.functional.gelu(input_19, approximate = 'none'); input_19 = None + input_21 = torch.nn.functional.dropout(input_20, 0.0, False, False); input_20 = None + input_22 = torch.nn.functional.linear(input_21, l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_); input_21 = l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_ = None + input_23 = torch.nn.functional.dropout(input_22, 0.0, False, False); input_22 = None + _log_api_usage_once_7 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_7 = None + x_51 = x_50 + input_23; x_50 = input_23 = None + x_52 = torch.nn.functional.pad(x_51, (0, 0, 0, 0, 0, 0), 'constant', None); x_51 = None + x0_1 = x_52[(Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None, None, None))] + x1_1 = x_52[(Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None, None, None))] + x2_1 = x_52[(Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None, None, None))] + x3_1 = x_52[(Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None, None, None))]; x_52 = None + x_53 = torch.cat([x0_1, x1_1, x2_1, x3_1], -1); x0_1 = x1_1 = x2_1 = x3_1 = None + x_54 = torch.nn.functional.layer_norm(x_53, (768,), l_self_modules_features_modules_4_modules_norm_parameters_weight_, l_self_modules_features_modules_4_modules_norm_parameters_bias_, 1e-05); x_53 = l_self_modules_features_modules_4_modules_norm_parameters_weight_ = l_self_modules_features_modules_4_modules_norm_parameters_bias_ = None + x_55 = torch.nn.functional.linear(x_54, l_self_modules_features_modules_4_modules_reduction_parameters_weight_, None); x_54 = l_self_modules_features_modules_4_modules_reduction_parameters_weight_ = None + layer_norm_11 = torch.nn.functional.layer_norm(x_55, (384,), l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_ = None + relative_position_bias_12 = l_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_13 = relative_position_bias_12.view(49, 49, -1); relative_position_bias_12 = None + permute_19 = relative_position_bias_13.permute(2, 0, 1); relative_position_bias_13 = None + contiguous_8 = permute_19.contiguous(); permute_19 = None + relative_position_bias_14 = contiguous_8.unsqueeze(0); contiguous_8 = None + x_56 = torch.nn.functional.pad(layer_norm_11, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_11 = None + x_57 = x_56.view(1, 2, 7, 2, 7, 384); x_56 = None + permute_20 = x_57.permute(0, 1, 3, 2, 4, 5); x_57 = None + x_58 = permute_20.reshape(4, 49, 384); permute_20 = None + qkv_8 = torch.nn.functional.linear(x_58, l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_); x_58 = l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_ = None + reshape_19 = qkv_8.reshape(4, 49, 3, 12, 32); qkv_8 = None + qkv_9 = reshape_19.permute(2, 0, 3, 1, 4); reshape_19 = None + q_8 = qkv_9[0] + k_4 = qkv_9[1] + v_4 = qkv_9[2]; qkv_9 = None + q_9 = q_8 * 0.1767766952966369; q_8 = None + transpose_8 = k_4.transpose(-2, -1); k_4 = None + attn_22 = q_9.matmul(transpose_8); q_9 = transpose_8 = None + attn_23 = attn_22 + relative_position_bias_14; attn_22 = relative_position_bias_14 = None + attn_24 = torch.nn.functional.softmax(attn_23, dim = -1); attn_23 = None + attn_25 = torch.nn.functional.dropout(attn_24, p = 0.0, training = False); attn_24 = None + matmul_9 = attn_25.matmul(v_4); attn_25 = v_4 = None + transpose_9 = matmul_9.transpose(1, 2); matmul_9 = None + x_59 = transpose_9.reshape(4, 49, 384); transpose_9 = None + x_60 = torch.nn.functional.linear(x_59, l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_); x_59 = l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_ = None + x_61 = torch.nn.functional.dropout(x_60, p = 0.0, training = False); x_60 = None + x_62 = x_61.view(1, 2, 2, 7, 7, 384); x_61 = None + permute_22 = x_62.permute(0, 1, 3, 2, 4, 5); x_62 = None + x_63 = permute_22.reshape(1, 14, 14, 384); permute_22 = None + getitem_32 = x_63[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_63 = None + x_64 = getitem_32.contiguous(); getitem_32 = None + _log_api_usage_once_8 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_8 = None + x_65 = x_55 + x_64; x_55 = x_64 = None + layer_norm_12 = torch.nn.functional.layer_norm(x_65, (384,), l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_ = None + input_24 = torch.nn.functional.linear(layer_norm_12, l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_); layer_norm_12 = l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_ = None + input_25 = torch.nn.functional.gelu(input_24, approximate = 'none'); input_24 = None + input_26 = torch.nn.functional.dropout(input_25, 0.0, False, False); input_25 = None + input_27 = torch.nn.functional.linear(input_26, l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_); input_26 = l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_ = None + input_28 = torch.nn.functional.dropout(input_27, 0.0, False, False); input_27 = None + _log_api_usage_once_9 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_9 = None + x_66 = x_65 + input_28; x_65 = input_28 = None + layer_norm_13 = torch.nn.functional.layer_norm(x_66, (384,), l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_ = None + relative_position_bias_15 = l_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_16 = relative_position_bias_15.view(49, 49, -1); relative_position_bias_15 = None + permute_23 = relative_position_bias_16.permute(2, 0, 1); relative_position_bias_16 = None + contiguous_10 = permute_23.contiguous(); permute_23 = None + relative_position_bias_17 = contiguous_10.unsqueeze(0); contiguous_10 = None + x_67 = torch.nn.functional.pad(layer_norm_13, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_13 = None + x_68 = torch.roll(x_67, shifts = (-3, -3), dims = (1, 2)); x_67 = None + x_69 = x_68.view(1, 2, 7, 2, 7, 384); x_68 = None + permute_24 = x_69.permute(0, 1, 3, 2, 4, 5); x_69 = None + x_70 = permute_24.reshape(4, 49, 384); permute_24 = None + qkv_10 = torch.nn.functional.linear(x_70, l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_); l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_ = None + reshape_23 = qkv_10.reshape(4, 49, 3, 12, 32); qkv_10 = None + qkv_11 = reshape_23.permute(2, 0, 3, 1, 4); reshape_23 = None + q_10 = qkv_11[0] + k_5 = qkv_11[1] + v_5 = qkv_11[2]; qkv_11 = None + q_11 = q_10 * 0.1767766952966369; q_10 = None + transpose_10 = k_5.transpose(-2, -1); k_5 = None + attn_26 = q_11.matmul(transpose_10); q_11 = transpose_10 = None + attn_27 = attn_26 + relative_position_bias_17; attn_26 = relative_position_bias_17 = None + attn_mask_10 = x_70.new_zeros((14, 14)); x_70 = None + attn_mask_10[(slice(0, -7, None), slice(0, -7, None))] = 0; setitem_18 = attn_mask_10; setitem_18 = None + attn_mask_10[(slice(0, -7, None), slice(-7, -3, None))] = 1; setitem_19 = attn_mask_10; setitem_19 = None + attn_mask_10[(slice(0, -7, None), slice(-3, None, None))] = 2; setitem_20 = attn_mask_10; setitem_20 = None + attn_mask_10[(slice(-7, -3, None), slice(0, -7, None))] = 3; setitem_21 = attn_mask_10; setitem_21 = None + attn_mask_10[(slice(-7, -3, None), slice(-7, -3, None))] = 4; setitem_22 = attn_mask_10; setitem_22 = None + attn_mask_10[(slice(-7, -3, None), slice(-3, None, None))] = 5; setitem_23 = attn_mask_10; setitem_23 = None + attn_mask_10[(slice(-3, None, None), slice(0, -7, None))] = 6; setitem_24 = attn_mask_10; setitem_24 = None + attn_mask_10[(slice(-3, None, None), slice(-7, -3, None))] = 7; setitem_25 = attn_mask_10; setitem_25 = None + attn_mask_10[(slice(-3, None, None), slice(-3, None, None))] = 8; setitem_26 = attn_mask_10; setitem_26 = None + attn_mask_11 = attn_mask_10.view(2, 7, 2, 7); attn_mask_10 = None + permute_26 = attn_mask_11.permute(0, 2, 1, 3); attn_mask_11 = None + attn_mask_12 = permute_26.reshape(4, 49); permute_26 = None + unsqueeze_14 = attn_mask_12.unsqueeze(1) + unsqueeze_15 = attn_mask_12.unsqueeze(2); attn_mask_12 = None + attn_mask_13 = unsqueeze_14 - unsqueeze_15; unsqueeze_14 = unsqueeze_15 = None + ne_2 = attn_mask_13 != 0 + masked_fill_4 = attn_mask_13.masked_fill(ne_2, -100.0); ne_2 = None + eq_2 = attn_mask_13.__eq__(0); attn_mask_13 = None + attn_mask_14 = masked_fill_4.masked_fill(eq_2, 0.0); masked_fill_4 = eq_2 = None + attn_28 = attn_27.view(1, 4, 12, 49, 49); attn_27 = None + unsqueeze_16 = attn_mask_14.unsqueeze(1); attn_mask_14 = None + unsqueeze_17 = unsqueeze_16.unsqueeze(0); unsqueeze_16 = None + attn_29 = attn_28 + unsqueeze_17; attn_28 = unsqueeze_17 = None + attn_30 = attn_29.view(-1, 12, 49, 49); attn_29 = None + attn_31 = torch.nn.functional.softmax(attn_30, dim = -1); attn_30 = None + attn_32 = torch.nn.functional.dropout(attn_31, p = 0.0, training = False); attn_31 = None + matmul_11 = attn_32.matmul(v_5); attn_32 = v_5 = None + transpose_11 = matmul_11.transpose(1, 2); matmul_11 = None + x_71 = transpose_11.reshape(4, 49, 384); transpose_11 = None + x_72 = torch.nn.functional.linear(x_71, l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_); x_71 = l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_ = None + x_73 = torch.nn.functional.dropout(x_72, p = 0.0, training = False); x_72 = None + x_74 = x_73.view(1, 2, 2, 7, 7, 384); x_73 = None + permute_27 = x_74.permute(0, 1, 3, 2, 4, 5); x_74 = None + x_75 = permute_27.reshape(1, 14, 14, 384); permute_27 = None + x_76 = torch.roll(x_75, shifts = (3, 3), dims = (1, 2)); x_75 = None + getitem_37 = x_76[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_76 = None + x_77 = getitem_37.contiguous(); getitem_37 = None + _log_api_usage_once_10 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_10 = None + x_78 = x_66 + x_77; x_66 = x_77 = None + layer_norm_14 = torch.nn.functional.layer_norm(x_78, (384,), l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_ = None + input_29 = torch.nn.functional.linear(layer_norm_14, l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_); layer_norm_14 = l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_ = None + input_30 = torch.nn.functional.gelu(input_29, approximate = 'none'); input_29 = None + input_31 = torch.nn.functional.dropout(input_30, 0.0, False, False); input_30 = None + input_32 = torch.nn.functional.linear(input_31, l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_); input_31 = l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_ = None + input_33 = torch.nn.functional.dropout(input_32, 0.0, False, False); input_32 = None + _log_api_usage_once_11 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_11 = None + x_79 = x_78 + input_33; x_78 = input_33 = None + layer_norm_15 = torch.nn.functional.layer_norm(x_79, (384,), l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_ = None + relative_position_bias_18 = l_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_19 = relative_position_bias_18.view(49, 49, -1); relative_position_bias_18 = None + permute_28 = relative_position_bias_19.permute(2, 0, 1); relative_position_bias_19 = None + contiguous_12 = permute_28.contiguous(); permute_28 = None + relative_position_bias_20 = contiguous_12.unsqueeze(0); contiguous_12 = None + x_80 = torch.nn.functional.pad(layer_norm_15, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_15 = None + x_81 = x_80.view(1, 2, 7, 2, 7, 384); x_80 = None + permute_29 = x_81.permute(0, 1, 3, 2, 4, 5); x_81 = None + x_82 = permute_29.reshape(4, 49, 384); permute_29 = None + qkv_12 = torch.nn.functional.linear(x_82, l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_); x_82 = l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_ = None + reshape_28 = qkv_12.reshape(4, 49, 3, 12, 32); qkv_12 = None + qkv_13 = reshape_28.permute(2, 0, 3, 1, 4); reshape_28 = None + q_12 = qkv_13[0] + k_6 = qkv_13[1] + v_6 = qkv_13[2]; qkv_13 = None + q_13 = q_12 * 0.1767766952966369; q_12 = None + transpose_12 = k_6.transpose(-2, -1); k_6 = None + attn_33 = q_13.matmul(transpose_12); q_13 = transpose_12 = None + attn_34 = attn_33 + relative_position_bias_20; attn_33 = relative_position_bias_20 = None + attn_35 = torch.nn.functional.softmax(attn_34, dim = -1); attn_34 = None + attn_36 = torch.nn.functional.dropout(attn_35, p = 0.0, training = False); attn_35 = None + matmul_13 = attn_36.matmul(v_6); attn_36 = v_6 = None + transpose_13 = matmul_13.transpose(1, 2); matmul_13 = None + x_83 = transpose_13.reshape(4, 49, 384); transpose_13 = None + x_84 = torch.nn.functional.linear(x_83, l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_); x_83 = l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_ = None + x_85 = torch.nn.functional.dropout(x_84, p = 0.0, training = False); x_84 = None + x_86 = x_85.view(1, 2, 2, 7, 7, 384); x_85 = None + permute_31 = x_86.permute(0, 1, 3, 2, 4, 5); x_86 = None + x_87 = permute_31.reshape(1, 14, 14, 384); permute_31 = None + getitem_42 = x_87[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_87 = None + x_88 = getitem_42.contiguous(); getitem_42 = None + _log_api_usage_once_12 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_12 = None + x_89 = x_79 + x_88; x_79 = x_88 = None + layer_norm_16 = torch.nn.functional.layer_norm(x_89, (384,), l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_ = None + input_34 = torch.nn.functional.linear(layer_norm_16, l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_); layer_norm_16 = l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_ = None + input_35 = torch.nn.functional.gelu(input_34, approximate = 'none'); input_34 = None + input_36 = torch.nn.functional.dropout(input_35, 0.0, False, False); input_35 = None + input_37 = torch.nn.functional.linear(input_36, l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_); input_36 = l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_ = None + input_38 = torch.nn.functional.dropout(input_37, 0.0, False, False); input_37 = None + _log_api_usage_once_13 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_13 = None + x_90 = x_89 + input_38; x_89 = input_38 = None + layer_norm_17 = torch.nn.functional.layer_norm(x_90, (384,), l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_ = None + relative_position_bias_21 = l_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_22 = relative_position_bias_21.view(49, 49, -1); relative_position_bias_21 = None + permute_32 = relative_position_bias_22.permute(2, 0, 1); relative_position_bias_22 = None + contiguous_14 = permute_32.contiguous(); permute_32 = None + relative_position_bias_23 = contiguous_14.unsqueeze(0); contiguous_14 = None + x_91 = torch.nn.functional.pad(layer_norm_17, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_17 = None + x_92 = torch.roll(x_91, shifts = (-3, -3), dims = (1, 2)); x_91 = None + x_93 = x_92.view(1, 2, 7, 2, 7, 384); x_92 = None + permute_33 = x_93.permute(0, 1, 3, 2, 4, 5); x_93 = None + x_94 = permute_33.reshape(4, 49, 384); permute_33 = None + qkv_14 = torch.nn.functional.linear(x_94, l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_); l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_ = None + reshape_32 = qkv_14.reshape(4, 49, 3, 12, 32); qkv_14 = None + qkv_15 = reshape_32.permute(2, 0, 3, 1, 4); reshape_32 = None + q_14 = qkv_15[0] + k_7 = qkv_15[1] + v_7 = qkv_15[2]; qkv_15 = None + q_15 = q_14 * 0.1767766952966369; q_14 = None + transpose_14 = k_7.transpose(-2, -1); k_7 = None + attn_37 = q_15.matmul(transpose_14); q_15 = transpose_14 = None + attn_38 = attn_37 + relative_position_bias_23; attn_37 = relative_position_bias_23 = None + attn_mask_15 = x_94.new_zeros((14, 14)); x_94 = None + attn_mask_15[(slice(0, -7, None), slice(0, -7, None))] = 0; setitem_27 = attn_mask_15; setitem_27 = None + attn_mask_15[(slice(0, -7, None), slice(-7, -3, None))] = 1; setitem_28 = attn_mask_15; setitem_28 = None + attn_mask_15[(slice(0, -7, None), slice(-3, None, None))] = 2; setitem_29 = attn_mask_15; setitem_29 = None + attn_mask_15[(slice(-7, -3, None), slice(0, -7, None))] = 3; setitem_30 = attn_mask_15; setitem_30 = None + attn_mask_15[(slice(-7, -3, None), slice(-7, -3, None))] = 4; setitem_31 = attn_mask_15; setitem_31 = None + attn_mask_15[(slice(-7, -3, None), slice(-3, None, None))] = 5; setitem_32 = attn_mask_15; setitem_32 = None + attn_mask_15[(slice(-3, None, None), slice(0, -7, None))] = 6; setitem_33 = attn_mask_15; setitem_33 = None + attn_mask_15[(slice(-3, None, None), slice(-7, -3, None))] = 7; setitem_34 = attn_mask_15; setitem_34 = None + attn_mask_15[(slice(-3, None, None), slice(-3, None, None))] = 8; setitem_35 = attn_mask_15; setitem_35 = None + attn_mask_16 = attn_mask_15.view(2, 7, 2, 7); attn_mask_15 = None + permute_35 = attn_mask_16.permute(0, 2, 1, 3); attn_mask_16 = None + attn_mask_17 = permute_35.reshape(4, 49); permute_35 = None + unsqueeze_20 = attn_mask_17.unsqueeze(1) + unsqueeze_21 = attn_mask_17.unsqueeze(2); attn_mask_17 = None + attn_mask_18 = unsqueeze_20 - unsqueeze_21; unsqueeze_20 = unsqueeze_21 = None + ne_3 = attn_mask_18 != 0 + masked_fill_6 = attn_mask_18.masked_fill(ne_3, -100.0); ne_3 = None + eq_3 = attn_mask_18.__eq__(0); attn_mask_18 = None + attn_mask_19 = masked_fill_6.masked_fill(eq_3, 0.0); masked_fill_6 = eq_3 = None + attn_39 = attn_38.view(1, 4, 12, 49, 49); attn_38 = None + unsqueeze_22 = attn_mask_19.unsqueeze(1); attn_mask_19 = None + unsqueeze_23 = unsqueeze_22.unsqueeze(0); unsqueeze_22 = None + attn_40 = attn_39 + unsqueeze_23; attn_39 = unsqueeze_23 = None + attn_41 = attn_40.view(-1, 12, 49, 49); attn_40 = None + attn_42 = torch.nn.functional.softmax(attn_41, dim = -1); attn_41 = None + attn_43 = torch.nn.functional.dropout(attn_42, p = 0.0, training = False); attn_42 = None + matmul_15 = attn_43.matmul(v_7); attn_43 = v_7 = None + transpose_15 = matmul_15.transpose(1, 2); matmul_15 = None + x_95 = transpose_15.reshape(4, 49, 384); transpose_15 = None + x_96 = torch.nn.functional.linear(x_95, l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_); x_95 = l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_ = None + x_97 = torch.nn.functional.dropout(x_96, p = 0.0, training = False); x_96 = None + x_98 = x_97.view(1, 2, 2, 7, 7, 384); x_97 = None + permute_36 = x_98.permute(0, 1, 3, 2, 4, 5); x_98 = None + x_99 = permute_36.reshape(1, 14, 14, 384); permute_36 = None + x_100 = torch.roll(x_99, shifts = (3, 3), dims = (1, 2)); x_99 = None + getitem_47 = x_100[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_100 = None + x_101 = getitem_47.contiguous(); getitem_47 = None + _log_api_usage_once_14 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_14 = None + x_102 = x_90 + x_101; x_90 = x_101 = None + layer_norm_18 = torch.nn.functional.layer_norm(x_102, (384,), l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_ = None + input_39 = torch.nn.functional.linear(layer_norm_18, l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_); layer_norm_18 = l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_ = None + input_40 = torch.nn.functional.gelu(input_39, approximate = 'none'); input_39 = None + input_41 = torch.nn.functional.dropout(input_40, 0.0, False, False); input_40 = None + input_42 = torch.nn.functional.linear(input_41, l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_); input_41 = l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_ = None + input_43 = torch.nn.functional.dropout(input_42, 0.0, False, False); input_42 = None + _log_api_usage_once_15 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_15 = None + x_103 = x_102 + input_43; x_102 = input_43 = None + layer_norm_19 = torch.nn.functional.layer_norm(x_103, (384,), l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_ = None + relative_position_bias_24 = l_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_25 = relative_position_bias_24.view(49, 49, -1); relative_position_bias_24 = None + permute_37 = relative_position_bias_25.permute(2, 0, 1); relative_position_bias_25 = None + contiguous_16 = permute_37.contiguous(); permute_37 = None + relative_position_bias_26 = contiguous_16.unsqueeze(0); contiguous_16 = None + x_104 = torch.nn.functional.pad(layer_norm_19, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_19 = None + x_105 = x_104.view(1, 2, 7, 2, 7, 384); x_104 = None + permute_38 = x_105.permute(0, 1, 3, 2, 4, 5); x_105 = None + x_106 = permute_38.reshape(4, 49, 384); permute_38 = None + qkv_16 = torch.nn.functional.linear(x_106, l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_); x_106 = l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_ = None + reshape_37 = qkv_16.reshape(4, 49, 3, 12, 32); qkv_16 = None + qkv_17 = reshape_37.permute(2, 0, 3, 1, 4); reshape_37 = None + q_16 = qkv_17[0] + k_8 = qkv_17[1] + v_8 = qkv_17[2]; qkv_17 = None + q_17 = q_16 * 0.1767766952966369; q_16 = None + transpose_16 = k_8.transpose(-2, -1); k_8 = None + attn_44 = q_17.matmul(transpose_16); q_17 = transpose_16 = None + attn_45 = attn_44 + relative_position_bias_26; attn_44 = relative_position_bias_26 = None + attn_46 = torch.nn.functional.softmax(attn_45, dim = -1); attn_45 = None + attn_47 = torch.nn.functional.dropout(attn_46, p = 0.0, training = False); attn_46 = None + matmul_17 = attn_47.matmul(v_8); attn_47 = v_8 = None + transpose_17 = matmul_17.transpose(1, 2); matmul_17 = None + x_107 = transpose_17.reshape(4, 49, 384); transpose_17 = None + x_108 = torch.nn.functional.linear(x_107, l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_); x_107 = l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_ = None + x_109 = torch.nn.functional.dropout(x_108, p = 0.0, training = False); x_108 = None + x_110 = x_109.view(1, 2, 2, 7, 7, 384); x_109 = None + permute_40 = x_110.permute(0, 1, 3, 2, 4, 5); x_110 = None + x_111 = permute_40.reshape(1, 14, 14, 384); permute_40 = None + getitem_52 = x_111[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_111 = None + x_112 = getitem_52.contiguous(); getitem_52 = None + _log_api_usage_once_16 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_16 = None + x_113 = x_103 + x_112; x_103 = x_112 = None + layer_norm_20 = torch.nn.functional.layer_norm(x_113, (384,), l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_ = None + input_44 = torch.nn.functional.linear(layer_norm_20, l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_); layer_norm_20 = l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_ = None + input_45 = torch.nn.functional.gelu(input_44, approximate = 'none'); input_44 = None + input_46 = torch.nn.functional.dropout(input_45, 0.0, False, False); input_45 = None + input_47 = torch.nn.functional.linear(input_46, l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_); input_46 = l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_ = None + input_48 = torch.nn.functional.dropout(input_47, 0.0, False, False); input_47 = None + _log_api_usage_once_17 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_17 = None + x_114 = x_113 + input_48; x_113 = input_48 = None + layer_norm_21 = torch.nn.functional.layer_norm(x_114, (384,), l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_ = None + relative_position_bias_27 = l_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_28 = relative_position_bias_27.view(49, 49, -1); relative_position_bias_27 = None + permute_41 = relative_position_bias_28.permute(2, 0, 1); relative_position_bias_28 = None + contiguous_18 = permute_41.contiguous(); permute_41 = None + relative_position_bias_29 = contiguous_18.unsqueeze(0); contiguous_18 = None + x_115 = torch.nn.functional.pad(layer_norm_21, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_21 = None + x_116 = torch.roll(x_115, shifts = (-3, -3), dims = (1, 2)); x_115 = None + x_117 = x_116.view(1, 2, 7, 2, 7, 384); x_116 = None + permute_42 = x_117.permute(0, 1, 3, 2, 4, 5); x_117 = None + x_118 = permute_42.reshape(4, 49, 384); permute_42 = None + qkv_18 = torch.nn.functional.linear(x_118, l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_); l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_ = None + reshape_41 = qkv_18.reshape(4, 49, 3, 12, 32); qkv_18 = None + qkv_19 = reshape_41.permute(2, 0, 3, 1, 4); reshape_41 = None + q_18 = qkv_19[0] + k_9 = qkv_19[1] + v_9 = qkv_19[2]; qkv_19 = None + q_19 = q_18 * 0.1767766952966369; q_18 = None + transpose_18 = k_9.transpose(-2, -1); k_9 = None + attn_48 = q_19.matmul(transpose_18); q_19 = transpose_18 = None + attn_49 = attn_48 + relative_position_bias_29; attn_48 = relative_position_bias_29 = None + attn_mask_20 = x_118.new_zeros((14, 14)); x_118 = None + attn_mask_20[(slice(0, -7, None), slice(0, -7, None))] = 0; setitem_36 = attn_mask_20; setitem_36 = None + attn_mask_20[(slice(0, -7, None), slice(-7, -3, None))] = 1; setitem_37 = attn_mask_20; setitem_37 = None + attn_mask_20[(slice(0, -7, None), slice(-3, None, None))] = 2; setitem_38 = attn_mask_20; setitem_38 = None + attn_mask_20[(slice(-7, -3, None), slice(0, -7, None))] = 3; setitem_39 = attn_mask_20; setitem_39 = None + attn_mask_20[(slice(-7, -3, None), slice(-7, -3, None))] = 4; setitem_40 = attn_mask_20; setitem_40 = None + attn_mask_20[(slice(-7, -3, None), slice(-3, None, None))] = 5; setitem_41 = attn_mask_20; setitem_41 = None + attn_mask_20[(slice(-3, None, None), slice(0, -7, None))] = 6; setitem_42 = attn_mask_20; setitem_42 = None + attn_mask_20[(slice(-3, None, None), slice(-7, -3, None))] = 7; setitem_43 = attn_mask_20; setitem_43 = None + attn_mask_20[(slice(-3, None, None), slice(-3, None, None))] = 8; setitem_44 = attn_mask_20; setitem_44 = None + attn_mask_21 = attn_mask_20.view(2, 7, 2, 7); attn_mask_20 = None + permute_44 = attn_mask_21.permute(0, 2, 1, 3); attn_mask_21 = None + attn_mask_22 = permute_44.reshape(4, 49); permute_44 = None + unsqueeze_26 = attn_mask_22.unsqueeze(1) + unsqueeze_27 = attn_mask_22.unsqueeze(2); attn_mask_22 = None + attn_mask_23 = unsqueeze_26 - unsqueeze_27; unsqueeze_26 = unsqueeze_27 = None + ne_4 = attn_mask_23 != 0 + masked_fill_8 = attn_mask_23.masked_fill(ne_4, -100.0); ne_4 = None + eq_4 = attn_mask_23.__eq__(0); attn_mask_23 = None + attn_mask_24 = masked_fill_8.masked_fill(eq_4, 0.0); masked_fill_8 = eq_4 = None + attn_50 = attn_49.view(1, 4, 12, 49, 49); attn_49 = None + unsqueeze_28 = attn_mask_24.unsqueeze(1); attn_mask_24 = None + unsqueeze_29 = unsqueeze_28.unsqueeze(0); unsqueeze_28 = None + attn_51 = attn_50 + unsqueeze_29; attn_50 = unsqueeze_29 = None + attn_52 = attn_51.view(-1, 12, 49, 49); attn_51 = None + attn_53 = torch.nn.functional.softmax(attn_52, dim = -1); attn_52 = None + attn_54 = torch.nn.functional.dropout(attn_53, p = 0.0, training = False); attn_53 = None + matmul_19 = attn_54.matmul(v_9); attn_54 = v_9 = None + transpose_19 = matmul_19.transpose(1, 2); matmul_19 = None + x_119 = transpose_19.reshape(4, 49, 384); transpose_19 = None + x_120 = torch.nn.functional.linear(x_119, l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_); x_119 = l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_ = None + x_121 = torch.nn.functional.dropout(x_120, p = 0.0, training = False); x_120 = None + x_122 = x_121.view(1, 2, 2, 7, 7, 384); x_121 = None + permute_45 = x_122.permute(0, 1, 3, 2, 4, 5); x_122 = None + x_123 = permute_45.reshape(1, 14, 14, 384); permute_45 = None + x_124 = torch.roll(x_123, shifts = (3, 3), dims = (1, 2)); x_123 = None + getitem_57 = x_124[(slice(None, None, None), slice(None, 14, None), slice(None, 14, None), slice(None, None, None))]; x_124 = None + x_125 = getitem_57.contiguous(); getitem_57 = None + _log_api_usage_once_18 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_18 = None + x_126 = x_114 + x_125; x_114 = x_125 = None + layer_norm_22 = torch.nn.functional.layer_norm(x_126, (384,), l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_ = None + input_49 = torch.nn.functional.linear(layer_norm_22, l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_); layer_norm_22 = l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_ = None + input_50 = torch.nn.functional.gelu(input_49, approximate = 'none'); input_49 = None + input_51 = torch.nn.functional.dropout(input_50, 0.0, False, False); input_50 = None + input_52 = torch.nn.functional.linear(input_51, l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_); input_51 = l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_ = None + input_53 = torch.nn.functional.dropout(input_52, 0.0, False, False); input_52 = None + _log_api_usage_once_19 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_19 = None + x_127 = x_126 + input_53; x_126 = input_53 = None + x_128 = torch.nn.functional.pad(x_127, (0, 0, 0, 0, 0, 0), 'constant', None); x_127 = None + x0_2 = x_128[(Ellipsis, slice(0, None, 2), slice(0, None, 2), slice(None, None, None))] + x1_2 = x_128[(Ellipsis, slice(1, None, 2), slice(0, None, 2), slice(None, None, None))] + x2_2 = x_128[(Ellipsis, slice(0, None, 2), slice(1, None, 2), slice(None, None, None))] + x3_2 = x_128[(Ellipsis, slice(1, None, 2), slice(1, None, 2), slice(None, None, None))]; x_128 = None + x_129 = torch.cat([x0_2, x1_2, x2_2, x3_2], -1); x0_2 = x1_2 = x2_2 = x3_2 = None + x_130 = torch.nn.functional.layer_norm(x_129, (1536,), l_self_modules_features_modules_6_modules_norm_parameters_weight_, l_self_modules_features_modules_6_modules_norm_parameters_bias_, 1e-05); x_129 = l_self_modules_features_modules_6_modules_norm_parameters_weight_ = l_self_modules_features_modules_6_modules_norm_parameters_bias_ = None + x_131 = torch.nn.functional.linear(x_130, l_self_modules_features_modules_6_modules_reduction_parameters_weight_, None); x_130 = l_self_modules_features_modules_6_modules_reduction_parameters_weight_ = None + layer_norm_24 = torch.nn.functional.layer_norm(x_131, (768,), l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_ = None + relative_position_bias_30 = l_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_31 = relative_position_bias_30.view(49, 49, -1); relative_position_bias_30 = None + permute_46 = relative_position_bias_31.permute(2, 0, 1); relative_position_bias_31 = None + contiguous_20 = permute_46.contiguous(); permute_46 = None + relative_position_bias_32 = contiguous_20.unsqueeze(0); contiguous_20 = None + x_132 = torch.nn.functional.pad(layer_norm_24, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_24 = None + x_133 = x_132.view(1, 1, 7, 1, 7, 768); x_132 = None + permute_47 = x_133.permute(0, 1, 3, 2, 4, 5); x_133 = None + x_134 = permute_47.reshape(1, 49, 768); permute_47 = None + qkv_20 = torch.nn.functional.linear(x_134, l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_); x_134 = l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_ = None + reshape_46 = qkv_20.reshape(1, 49, 3, 24, 32); qkv_20 = None + qkv_21 = reshape_46.permute(2, 0, 3, 1, 4); reshape_46 = None + q_20 = qkv_21[0] + k_10 = qkv_21[1] + v_10 = qkv_21[2]; qkv_21 = None + q_21 = q_20 * 0.1767766952966369; q_20 = None + transpose_20 = k_10.transpose(-2, -1); k_10 = None + attn_55 = q_21.matmul(transpose_20); q_21 = transpose_20 = None + attn_56 = attn_55 + relative_position_bias_32; attn_55 = relative_position_bias_32 = None + attn_57 = torch.nn.functional.softmax(attn_56, dim = -1); attn_56 = None + attn_58 = torch.nn.functional.dropout(attn_57, p = 0.0, training = False); attn_57 = None + matmul_21 = attn_58.matmul(v_10); attn_58 = v_10 = None + transpose_21 = matmul_21.transpose(1, 2); matmul_21 = None + x_135 = transpose_21.reshape(1, 49, 768); transpose_21 = None + x_136 = torch.nn.functional.linear(x_135, l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_); x_135 = l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_ = None + x_137 = torch.nn.functional.dropout(x_136, p = 0.0, training = False); x_136 = None + x_138 = x_137.view(1, 1, 1, 7, 7, 768); x_137 = None + permute_49 = x_138.permute(0, 1, 3, 2, 4, 5); x_138 = None + x_139 = permute_49.reshape(1, 7, 7, 768); permute_49 = None + getitem_66 = x_139[(slice(None, None, None), slice(None, 7, None), slice(None, 7, None), slice(None, None, None))]; x_139 = None + x_140 = getitem_66.contiguous(); getitem_66 = None + _log_api_usage_once_20 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_20 = None + x_141 = x_131 + x_140; x_131 = x_140 = None + layer_norm_25 = torch.nn.functional.layer_norm(x_141, (768,), l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_ = None + input_54 = torch.nn.functional.linear(layer_norm_25, l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_); layer_norm_25 = l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_ = None + input_55 = torch.nn.functional.gelu(input_54, approximate = 'none'); input_54 = None + input_56 = torch.nn.functional.dropout(input_55, 0.0, False, False); input_55 = None + input_57 = torch.nn.functional.linear(input_56, l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_); input_56 = l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_ = None + input_58 = torch.nn.functional.dropout(input_57, 0.0, False, False); input_57 = None + _log_api_usage_once_21 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_21 = None + x_142 = x_141 + input_58; x_141 = input_58 = None + layer_norm_26 = torch.nn.functional.layer_norm(x_142, (768,), l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_, 1e-05); l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_ = None + relative_position_bias_33 = l_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_[l_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_]; l_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_ = l_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_ = None + relative_position_bias_34 = relative_position_bias_33.view(49, 49, -1); relative_position_bias_33 = None + permute_50 = relative_position_bias_34.permute(2, 0, 1); relative_position_bias_34 = None + contiguous_22 = permute_50.contiguous(); permute_50 = None + relative_position_bias_35 = contiguous_22.unsqueeze(0); contiguous_22 = None + x_143 = torch.nn.functional.pad(layer_norm_26, (0, 0, 0, 0, 0, 0), 'constant', None); layer_norm_26 = None + x_144 = x_143.view(1, 1, 7, 1, 7, 768); x_143 = None + permute_51 = x_144.permute(0, 1, 3, 2, 4, 5); x_144 = None + x_145 = permute_51.reshape(1, 49, 768); permute_51 = None + qkv_22 = torch.nn.functional.linear(x_145, l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_); x_145 = l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_ = None + reshape_50 = qkv_22.reshape(1, 49, 3, 24, 32); qkv_22 = None + qkv_23 = reshape_50.permute(2, 0, 3, 1, 4); reshape_50 = None + q_22 = qkv_23[0] + k_11 = qkv_23[1] + v_11 = qkv_23[2]; qkv_23 = None + q_23 = q_22 * 0.1767766952966369; q_22 = None + transpose_22 = k_11.transpose(-2, -1); k_11 = None + attn_59 = q_23.matmul(transpose_22); q_23 = transpose_22 = None + attn_60 = attn_59 + relative_position_bias_35; attn_59 = relative_position_bias_35 = None + attn_61 = torch.nn.functional.softmax(attn_60, dim = -1); attn_60 = None + attn_62 = torch.nn.functional.dropout(attn_61, p = 0.0, training = False); attn_61 = None + matmul_23 = attn_62.matmul(v_11); attn_62 = v_11 = None + transpose_23 = matmul_23.transpose(1, 2); matmul_23 = None + x_146 = transpose_23.reshape(1, 49, 768); transpose_23 = None + x_147 = torch.nn.functional.linear(x_146, l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_); x_146 = l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_ = None + x_148 = torch.nn.functional.dropout(x_147, p = 0.0, training = False); x_147 = None + x_149 = x_148.view(1, 1, 1, 7, 7, 768); x_148 = None + permute_53 = x_149.permute(0, 1, 3, 2, 4, 5); x_149 = None + x_150 = permute_53.reshape(1, 7, 7, 768); permute_53 = None + getitem_71 = x_150[(slice(None, None, None), slice(None, 7, None), slice(None, 7, None), slice(None, None, None))]; x_150 = None + x_151 = getitem_71.contiguous(); getitem_71 = None + _log_api_usage_once_22 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_22 = None + x_152 = x_142 + x_151; x_142 = x_151 = None + layer_norm_27 = torch.nn.functional.layer_norm(x_152, (768,), l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_, 1e-05); l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_ = None + input_59 = torch.nn.functional.linear(layer_norm_27, l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_); layer_norm_27 = l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_ = None + input_60 = torch.nn.functional.gelu(input_59, approximate = 'none'); input_59 = None + input_61 = torch.nn.functional.dropout(input_60, 0.0, False, False); input_60 = None + input_62 = torch.nn.functional.linear(input_61, l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_, l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_); input_61 = l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_ = l_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_ = None + input_63 = torch.nn.functional.dropout(input_62, 0.0, False, False); input_62 = None + _log_api_usage_once_23 = torch._C._log_api_usage_once('torchvision.ops.stochastic_depth.stochastic_depth'); _log_api_usage_once_23 = None + x_153 = x_152 + input_63; x_152 = input_63 = None + x_154 = torch.nn.functional.layer_norm(x_153, (768,), l_self_modules_norm_parameters_weight_, l_self_modules_norm_parameters_bias_, 1e-05); x_153 = l_self_modules_norm_parameters_weight_ = l_self_modules_norm_parameters_bias_ = None + x_155 = torch.permute(x_154, [0, 3, 1, 2]); x_154 = None + x_156 = torch.nn.functional.adaptive_avg_pool2d(x_155, 1); x_155 = None + x_157 = x_156.flatten(1, -1); x_156 = None + x_158 = torch.nn.functional.linear(x_157, l_self_modules_head_parameters_weight_, l_self_modules_head_parameters_bias_); x_157 = l_self_modules_head_parameters_weight_ = l_self_modules_head_parameters_bias_ = None + return (x_158,) + \ No newline at end of file diff --git a/samples/torchvision/swin_t/weight_meta.py b/samples/torchvision/swin_t/weight_meta.py new file mode 100644 index 000000000..d23a6a67c --- /dev/null +++ b/samples/torchvision/swin_t/weight_meta.py @@ -0,0 +1,1685 @@ +class Program_weight_tensor_meta_L_self_modules_features_modules_0_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_0_modules_0_parameters_weight_" + shape = [96, 3, 4, 4] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.056 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_0_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_0_modules_0_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 0.024 + std = 0.294 + data = None + +class Program_weight_tensor_meta_L_x_: + name = "L_x_" + shape = [1, 3, 224, 224] + dtype = "torch.float32" + device = "cpu" + mean = 0.226 + std = 1.286 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_0_modules_2_parameters_weight_: + name = "L_self_modules_features_modules_0_modules_2_parameters_weight_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 1.440 + std = 1.340 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_0_modules_2_parameters_bias_: + name = "L_self_modules_features_modules_0_modules_2_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.003 + std = 0.489 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_weight_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 0.717 + std = 0.413 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_norm1_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.030 + std = 0.380 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 3] + dtype = "torch.float32" + device = "cpu" + mean = -0.614 + std = 1.215 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [288, 96] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.073 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_weight_" + shape = [96, 96] + dtype = "torch.float32" + device = "cpu" + mean = -0.002 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_modules_qkv_parameters_bias_" + shape = [288] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.541 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_attn_modules_proj_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.178 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_weight_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 1.720 + std = 0.737 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_norm2_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.064 + std = 0.507 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_weight_" + shape = [384, 96] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.052 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_mlp_modules_0_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.328 + std = 0.386 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_weight_" + shape = [96, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.043 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_0_modules_mlp_modules_3_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.002 + std = 0.284 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_weight_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 1.232 + std = 0.262 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_norm1_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.039 + std = 0.387 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 3] + dtype = "torch.float32" + device = "cpu" + mean = -0.928 + std = 1.642 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [288, 96] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.070 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_weight_" + shape = [96, 96] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_modules_qkv_parameters_bias_" + shape = [288] + dtype = "torch.float32" + device = "cpu" + mean = -0.021 + std = 0.433 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_attn_modules_proj_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.176 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_weight_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = 2.111 + std = 0.495 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_norm2_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.047 + std = 0.330 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_weight_" + shape = [384, 96] + dtype = "torch.float32" + device = "cpu" + mean = -0.002 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_mlp_modules_0_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.499 + std = 0.361 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_weight_" + shape = [96, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.051 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_1_modules_1_modules_mlp_modules_3_parameters_bias_" + shape = [96] + dtype = "torch.float32" + device = "cpu" + mean = -0.006 + std = 0.235 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_2_modules_norm_parameters_weight_: + name = "L_self_modules_features_modules_2_modules_norm_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.737 + std = 0.211 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_2_modules_norm_parameters_bias_: + name = "L_self_modules_features_modules_2_modules_norm_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.040 + std = 0.273 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_2_modules_reduction_parameters_weight_: + name = "L_self_modules_features_modules_2_modules_reduction_parameters_weight_" + shape = [192, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.042 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_weight_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 0.664 + std = 0.128 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_norm1_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.184 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 6] + dtype = "torch.float32" + device = "cpu" + mean = -0.852 + std = 1.255 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [576, 192] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.060 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_weight_" + shape = [192, 192] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.044 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_modules_qkv_parameters_bias_" + shape = [576] + dtype = "torch.float32" + device = "cpu" + mean = 0.006 + std = 0.384 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_attn_modules_proj_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.005 + std = 0.229 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_weight_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 1.272 + std = 0.326 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_norm2_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.005 + std = 0.183 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_weight_" + shape = [768, 192] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_mlp_modules_0_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.669 + std = 0.263 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_weight_" + shape = [192, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.048 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_0_modules_mlp_modules_3_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.289 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_weight_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 0.963 + std = 0.124 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_norm1_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.008 + std = 0.277 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 6] + dtype = "torch.float32" + device = "cpu" + mean = -0.743 + std = 1.505 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [576, 192] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.058 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_weight_" + shape = [192, 192] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.046 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_modules_qkv_parameters_bias_" + shape = [576] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.359 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_attn_modules_proj_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.253 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_weight_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = 1.429 + std = 0.219 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_norm2_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.012 + std = 0.330 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_weight_" + shape = [768, 192] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.051 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_mlp_modules_0_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.642 + std = 0.203 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_weight_" + shape = [192, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.055 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_3_modules_1_modules_mlp_modules_3_parameters_bias_" + shape = [192] + dtype = "torch.float32" + device = "cpu" + mean = -0.005 + std = 0.337 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_4_modules_norm_parameters_weight_: + name = "L_self_modules_features_modules_4_modules_norm_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.362 + std = 0.087 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_4_modules_norm_parameters_bias_: + name = "L_self_modules_features_modules_4_modules_norm_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.177 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_4_modules_reduction_parameters_weight_: + name = "L_self_modules_features_modules_4_modules_reduction_parameters_weight_" + shape = [384, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.042 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.426 + std = 0.162 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.082 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.925 + std = 2.201 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.054 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.042 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = 0.002 + std = 0.338 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.006 + std = 0.245 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.756 + std = 0.204 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.215 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.048 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.723 + std = 0.238 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.039 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_0_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.233 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.543 + std = 0.116 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.116 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.793 + std = 1.902 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.058 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.040 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = 0.013 + std = 0.319 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.004 + std = 0.184 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.805 + std = 0.098 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.212 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.692 + std = 0.224 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.043 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_1_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.002 + std = 0.197 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.683 + std = 0.154 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.142 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.355 + std = 1.539 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.053 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.043 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.318 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.254 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.869 + std = 0.073 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.237 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.670 + std = 0.204 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.047 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_2_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.002 + std = 0.192 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.659 + std = 0.128 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.158 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.482 + std = 1.562 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.056 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.042 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = 0.005 + std = 0.326 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.191 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.949 + std = 0.087 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.006 + std = 0.248 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.653 + std = 0.210 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.054 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_3_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.002 + std = 0.175 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.819 + std = 0.166 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = -0.001 + std = 0.211 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.121 + std = 0.915 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.052 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.048 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = -0.006 + std = 0.359 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.004 + std = 0.311 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 1.065 + std = 0.114 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.005 + std = 0.274 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.641 + std = 0.238 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.062 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_4_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.004 + std = 0.227 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.858 + std = 0.154 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_norm1_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.243 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 12] + dtype = "torch.float32" + device = "cpu" + mean = -0.439 + std = 1.099 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_weight_" + shape = [1152, 384] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.055 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_weight_" + shape = [384, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.046 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_modules_qkv_parameters_bias_" + shape = [1152] + dtype = "torch.float32" + device = "cpu" + mean = -0.008 + std = 0.349 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_attn_modules_proj_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.245 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_weight_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 1.170 + std = 0.124 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_norm2_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.006 + std = 0.294 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_weight_" + shape = [1536, 384] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.053 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_mlp_modules_0_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.644 + std = 0.295 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_weight_" + shape = [384, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.067 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_5_modules_5_modules_mlp_modules_3_parameters_bias_" + shape = [384] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.210 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_6_modules_norm_parameters_weight_: + name = "L_self_modules_features_modules_6_modules_norm_parameters_weight_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = 0.309 + std = 0.027 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_6_modules_norm_parameters_bias_: + name = "L_self_modules_features_modules_6_modules_norm_parameters_bias_" + shape = [1536] + dtype = "torch.float32" + device = "cpu" + mean = 0.002 + std = 0.134 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_6_modules_reduction_parameters_weight_: + name = "L_self_modules_features_modules_6_modules_reduction_parameters_weight_" + shape = [768, 1536] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.041 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.361 + std = 0.129 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_norm1_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.004 + std = 0.116 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 24] + dtype = "torch.float32" + device = "cpu" + mean = 0.035 + std = 1.271 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_weight_" + shape = [2304, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_weight_" + shape = [768, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.044 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_modules_qkv_parameters_bias_" + shape = [2304] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.482 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_attn_modules_proj_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.401 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 2.165 + std = 0.427 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_norm2_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.036 + std = 0.750 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_weight_" + shape = [3072, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.001 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_mlp_modules_0_parameters_bias_" + shape = [3072] + dtype = "torch.float32" + device = "cpu" + mean = -0.622 + std = 0.164 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_weight_" + shape = [768, 3072] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.053 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_0_modules_mlp_modules_3_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.003 + std = 0.374 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.560 + std = 0.148 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_norm1_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.010 + std = 0.241 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_parameters_relative_position_bias_table_" + shape = [169, 24] + dtype = "torch.float32" + device = "cpu" + mean = 0.074 + std = 1.137 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_buffers_relative_position_index_" + shape = [2401] + dtype = "torch.int64" + device = "cpu" + mean = None + std = None + min_val = 0 + max_val = 168 + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_weight_" + shape = [2304, 768] + dtype = "torch.float32" + device = "cpu" + mean = -0.000 + std = 0.051 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_weight_" + shape = [768, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.046 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_modules_qkv_parameters_bias_" + shape = [2304] + dtype = "torch.float32" + device = "cpu" + mean = -0.009 + std = 0.441 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_attn_modules_proj_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.010 + std = 0.836 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 2.356 + std = 0.491 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_norm2_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = -0.005 + std = 0.528 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_weight_" + shape = [3072, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.049 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_mlp_modules_0_parameters_bias_" + shape = [3072] + dtype = "torch.float32" + device = "cpu" + mean = -0.625 + std = 0.229 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_: + name = "L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_weight_" + shape = [768, 3072] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.055 + data = None + +class Program_weight_tensor_meta_L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_: + name = "L_self_modules_features_modules_7_modules_1_modules_mlp_modules_3_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.007 + std = 0.540 + data = None + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_weight_: + name = "L_self_modules_norm_parameters_weight_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 2.007 + std = 0.267 + data = None + +class Program_weight_tensor_meta_L_self_modules_norm_parameters_bias_: + name = "L_self_modules_norm_parameters_bias_" + shape = [768] + dtype = "torch.float32" + device = "cpu" + mean = 0.007 + std = 0.160 + data = None + +class Program_weight_tensor_meta_L_self_modules_head_parameters_weight_: + name = "L_self_modules_head_parameters_weight_" + shape = [1000, 768] + dtype = "torch.float32" + device = "cpu" + mean = 0.000 + std = 0.050 + data = None + +class Program_weight_tensor_meta_L_self_modules_head_parameters_bias_: + name = "L_self_modules_head_parameters_bias_" + shape = [1000] + dtype = "torch.float32" + device = "cpu" + mean = -0.059 + std = 0.182 + data = None