SinyaWoo commited on
Commit
7e904b5
·
verified ·
1 Parent(s): f26d090

Training in progress, epoch 1

Browse files
Files changed (4) hide show
  1. config.json +112 -51
  2. model.safetensors +2 -2
  3. preprocessor_config.json +6 -7
  4. training_args.bin +1 -1
config.json CHANGED
@@ -1,80 +1,141 @@
1
  {
2
  "activation_dropout": 0.0,
3
- "activation_function": "relu",
 
4
  "architectures": [
5
- "ConditionalDetrForObjectDetection"
6
  ],
7
  "attention_dropout": 0.0,
8
- "auxiliary_loss": false,
 
9
  "backbone_config": {
10
- "backbone": "resnet50",
 
 
 
 
 
 
 
11
  "dtype": "float32",
12
- "features_only": true,
13
- "freeze_batch_norm_2d": false,
14
- "model_type": "timm_backbone",
 
 
 
 
 
 
 
15
  "num_channels": 3,
16
  "out_features": [
17
- "layer1",
18
- "layer2",
19
- "layer3",
20
- "layer4"
21
  ],
22
  "out_indices": [
23
- 1,
24
  2,
25
  3,
26
  4
27
  ],
28
- "output_stride": null,
29
  "stage_names": [
30
- "act1",
31
- "layer1",
32
- "layer2",
33
- "layer3",
34
- "layer4"
35
- ],
36
- "use_pretrained_backbone": false
37
  },
38
- "bbox_cost": 5,
39
- "bbox_loss_coefficient": 5,
40
- "class_cost": 2,
41
- "cls_loss_coefficient": 2,
42
  "d_model": 256,
 
43
  "decoder_attention_heads": 8,
44
- "decoder_ffn_dim": 2048,
45
- "decoder_layerdrop": 0.0,
 
 
 
 
46
  "decoder_layers": 6,
47
- "dice_loss_coefficient": 1,
48
- "dropout": 0.1,
 
 
 
 
49
  "dtype": "float32",
 
 
 
 
50
  "encoder_attention_heads": 8,
51
- "encoder_ffn_dim": 2048,
52
- "encoder_layerdrop": 0.0,
53
- "encoder_layers": 6,
54
- "focal_alpha": 0.25,
55
- "giou_cost": 2,
56
- "giou_loss_coefficient": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  "id2label": {
58
- "0": "head",
59
- "1": "helmet",
60
- "2": "person"
 
 
 
 
 
61
  },
62
- "init_std": 0.02,
63
- "init_xavier_std": 1.0,
64
  "is_encoder_decoder": true,
65
  "label2id": {
66
- "head": 0,
67
- "helmet": 1,
68
- "person": 2
 
 
 
 
 
69
  },
70
- "mask_loss_coefficient": 1,
71
- "max_position_embeddings": 1024,
72
- "model_type": "conditional_detr",
73
- "num_channels": 3,
74
- "num_hidden_layers": 6,
 
 
 
 
 
 
 
75
  "num_queries": 300,
76
- "position_embedding_type": "sine",
77
- "scale_embedding": false,
78
- "transformers_version": "5.3.0.dev0",
79
- "use_cache": false
 
 
 
 
 
 
 
80
  }
 
1
  {
2
  "activation_dropout": 0.0,
3
+ "activation_function": "silu",
4
+ "anchor_image_size": null,
5
  "architectures": [
6
+ "RTDetrV2ForObjectDetection"
7
  ],
8
  "attention_dropout": 0.0,
9
+ "auxiliary_loss": true,
10
+ "backbone": null,
11
  "backbone_config": {
12
+ "depths": [
13
+ 3,
14
+ 4,
15
+ 6,
16
+ 3
17
+ ],
18
+ "downsample_in_bottleneck": false,
19
+ "downsample_in_first_stage": false,
20
  "dtype": "float32",
21
+ "embedding_size": 64,
22
+ "hidden_act": "relu",
23
+ "hidden_sizes": [
24
+ 256,
25
+ 512,
26
+ 1024,
27
+ 2048
28
+ ],
29
+ "layer_type": "bottleneck",
30
+ "model_type": "rt_detr_resnet",
31
  "num_channels": 3,
32
  "out_features": [
33
+ "stage2",
34
+ "stage3",
35
+ "stage4"
 
36
  ],
37
  "out_indices": [
 
38
  2,
39
  3,
40
  4
41
  ],
 
42
  "stage_names": [
43
+ "stem",
44
+ "stage1",
45
+ "stage2",
46
+ "stage3",
47
+ "stage4"
48
+ ]
 
49
  },
50
+ "backbone_kwargs": null,
51
+ "batch_norm_eps": 1e-05,
52
+ "box_noise_scale": 1.0,
 
53
  "d_model": 256,
54
+ "decoder_activation_function": "relu",
55
  "decoder_attention_heads": 8,
56
+ "decoder_ffn_dim": 1024,
57
+ "decoder_in_channels": [
58
+ 256,
59
+ 256,
60
+ 256
61
+ ],
62
  "decoder_layers": 6,
63
+ "decoder_method": "default",
64
+ "decoder_n_levels": 3,
65
+ "decoder_n_points": 4,
66
+ "decoder_offset_scale": 0.5,
67
+ "disable_custom_kernels": true,
68
+ "dropout": 0.0,
69
  "dtype": "float32",
70
+ "encode_proj_layers": [
71
+ 2
72
+ ],
73
+ "encoder_activation_function": "gelu",
74
  "encoder_attention_heads": 8,
75
+ "encoder_ffn_dim": 1024,
76
+ "encoder_hidden_dim": 256,
77
+ "encoder_in_channels": [
78
+ 512,
79
+ 1024,
80
+ 2048
81
+ ],
82
+ "encoder_layers": 1,
83
+ "eos_coefficient": 0.0001,
84
+ "eval_size": null,
85
+ "feat_strides": [
86
+ 8,
87
+ 16,
88
+ 32
89
+ ],
90
+ "focal_loss_alpha": 0.75,
91
+ "focal_loss_gamma": 2.0,
92
+ "freeze_backbone_batch_norms": true,
93
+ "hidden_expansion": 1.0,
94
  "id2label": {
95
+ "0": "road-traffic",
96
+ "1": "bicycles",
97
+ "2": "buses",
98
+ "3": "crosswalks",
99
+ "4": "fire hydrants",
100
+ "5": "motorcycles",
101
+ "6": "traffic lights",
102
+ "7": "vehicles"
103
  },
104
+ "initializer_bias_prior_prob": null,
105
+ "initializer_range": 0.01,
106
  "is_encoder_decoder": true,
107
  "label2id": {
108
+ "bicycles": 1,
109
+ "buses": 2,
110
+ "crosswalks": 3,
111
+ "fire hydrants": 4,
112
+ "motorcycles": 5,
113
+ "road-traffic": 0,
114
+ "traffic lights": 6,
115
+ "vehicles": 7
116
  },
117
+ "label_noise_ratio": 0.5,
118
+ "layer_norm_eps": 1e-05,
119
+ "learn_initial_query": false,
120
+ "matcher_alpha": 0.25,
121
+ "matcher_bbox_cost": 5.0,
122
+ "matcher_class_cost": 2.0,
123
+ "matcher_gamma": 2.0,
124
+ "matcher_giou_cost": 2.0,
125
+ "model_type": "rt_detr_v2",
126
+ "normalize_before": false,
127
+ "num_denoising": 100,
128
+ "num_feature_levels": 3,
129
  "num_queries": 300,
130
+ "positional_encoding_temperature": 10000,
131
+ "tie_word_embeddings": true,
132
+ "transformers_version": "5.0.0",
133
+ "use_cache": false,
134
+ "use_focal_loss": true,
135
+ "use_pretrained_backbone": false,
136
+ "use_timm_backbone": false,
137
+ "weight_loss_bbox": 5.0,
138
+ "weight_loss_giou": 2.0,
139
+ "weight_loss_vfl": 1.0,
140
+ "with_box_refine": true
141
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb52d41b98ea93c6a7c145148db3c1542160f57d09ea83e34cf29361b664a844
3
- size 174077740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c0b275531c2f656740aa63a10c952053fd8c95b61c914b8f9b98b80516233e2
3
+ size 168887224
preprocessor_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
- "data_format": "channels_first",
3
  "do_convert_annotations": true,
4
- "do_normalize": true,
5
  "do_pad": true,
6
  "do_rescale": true,
7
  "do_resize": true,
@@ -11,20 +10,20 @@
11
  0.456,
12
  0.406
13
  ],
14
- "image_processor_type": "ConditionalDetrImageProcessorFast",
15
  "image_std": [
16
  0.229,
17
  0.224,
18
  0.225
19
  ],
20
  "pad_size": {
21
- "height": 480,
22
- "width": 480
23
  },
24
  "resample": 2,
25
  "rescale_factor": 0.00392156862745098,
26
  "size": {
27
- "max_height": 480,
28
- "max_width": 480
29
  }
30
  }
 
1
  {
 
2
  "do_convert_annotations": true,
3
+ "do_normalize": false,
4
  "do_pad": true,
5
  "do_rescale": true,
6
  "do_resize": true,
 
10
  0.456,
11
  0.406
12
  ],
13
+ "image_processor_type": "RTDetrImageProcessor",
14
  "image_std": [
15
  0.229,
16
  0.224,
17
  0.225
18
  ],
19
  "pad_size": {
20
+ "height": 640,
21
+ "width": 640
22
  },
23
  "resample": 2,
24
  "rescale_factor": 0.00392156862745098,
25
  "size": {
26
+ "max_height": 640,
27
+ "max_width": 640
28
  }
29
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba2bac2e45dc88fd8c1930f3e4663225d1c686e7a3d840161655bb704bbde682
3
  size 5201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc26235de7531145f9e2db59614c1be6fe7e00a1f6fad2226e9def19b2a924d5
3
  size 5201