Training in progress, epoch 1

Browse files

Files changed (4) hide show

config.json +112 -51
model.safetensors +2 -2
preprocessor_config.json +6 -7
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,80 +1,141 @@
 {
   "activation_dropout": 0.0,
-  "activation_function": "relu",
   "architectures": [
-    "ConditionalDetrForObjectDetection"
   ],
   "attention_dropout": 0.0,
-  "auxiliary_loss": false,
   "backbone_config": {
-    "backbone": "resnet50",
     "dtype": "float32",
-    "features_only": true,
-    "freeze_batch_norm_2d": false,
-    "model_type": "timm_backbone",
     "num_channels": 3,
     "out_features": [
-      "layer1",
-      "layer2",
-      "layer3",
-      "layer4"
     ],
     "out_indices": [
-      1,
       2,
       3,
       4
     ],
-    "output_stride": null,
     "stage_names": [
-      "act1",
-      "layer1",
-      "layer2",
-      "layer3",
-      "layer4"
-    ],
-    "use_pretrained_backbone": false
   },
-  "bbox_cost": 5,
-  "bbox_loss_coefficient": 5,
-  "class_cost": 2,
-  "cls_loss_coefficient": 2,
   "d_model": 256,
   "decoder_attention_heads": 8,
-  "decoder_ffn_dim": 2048,
-  "decoder_layerdrop": 0.0,
   "decoder_layers": 6,
-  "dice_loss_coefficient": 1,
-  "dropout": 0.1,
   "dtype": "float32",
   "encoder_attention_heads": 8,
-  "encoder_ffn_dim": 2048,
-  "encoder_layerdrop": 0.0,
-  "encoder_layers": 6,
-  "focal_alpha": 0.25,
-  "giou_cost": 2,
-  "giou_loss_coefficient": 2,
   "id2label": {
-    "0": "head",
-    "1": "helmet",
-    "2": "person"
   },
-  "init_std": 0.02,
-  "init_xavier_std": 1.0,
   "is_encoder_decoder": true,
   "label2id": {
-    "head": 0,
-    "helmet": 1,
-    "person": 2
   },
-  "mask_loss_coefficient": 1,
-  "max_position_embeddings": 1024,
-  "model_type": "conditional_detr",
-  "num_channels": 3,
-  "num_hidden_layers": 6,
   "num_queries": 300,
-  "position_embedding_type": "sine",
-  "scale_embedding": false,
-  "transformers_version": "5.3.0.dev0",
-  "use_cache": false
 }

 {
   "activation_dropout": 0.0,
+  "activation_function": "silu",
+  "anchor_image_size": null,
   "architectures": [
+    "RTDetrV2ForObjectDetection"
   ],
   "attention_dropout": 0.0,
+  "auxiliary_loss": true,
+  "backbone": null,
   "backbone_config": {
+    "depths": [
+      3,
+      4,
+      6,
+      3
+    ],
+    "downsample_in_bottleneck": false,
+    "downsample_in_first_stage": false,
     "dtype": "float32",
+    "embedding_size": 64,
+    "hidden_act": "relu",
+    "hidden_sizes": [
+      256,
+      512,
+      1024,
+      2048
+    ],
+    "layer_type": "bottleneck",
+    "model_type": "rt_detr_resnet",
     "num_channels": 3,
     "out_features": [
+      "stage2",
+      "stage3",
+      "stage4"
     ],
     "out_indices": [
       2,
       3,
       4
     ],
     "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ]
   },
+  "backbone_kwargs": null,
+  "batch_norm_eps": 1e-05,
+  "box_noise_scale": 1.0,
   "d_model": 256,
+  "decoder_activation_function": "relu",
   "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 1024,
+  "decoder_in_channels": [
+    256,
+    256,
+    256
+  ],
   "decoder_layers": 6,
+  "decoder_method": "default",
+  "decoder_n_levels": 3,
+  "decoder_n_points": 4,
+  "decoder_offset_scale": 0.5,
+  "disable_custom_kernels": true,
+  "dropout": 0.0,
   "dtype": "float32",
+  "encode_proj_layers": [
+    2
+  ],
+  "encoder_activation_function": "gelu",
   "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 1024,
+  "encoder_hidden_dim": 256,
+  "encoder_in_channels": [
+    512,
+    1024,
+    2048
+  ],
+  "encoder_layers": 1,
+  "eos_coefficient": 0.0001,
+  "eval_size": null,
+  "feat_strides": [
+    8,
+    16,
+    32
+  ],
+  "focal_loss_alpha": 0.75,
+  "focal_loss_gamma": 2.0,
+  "freeze_backbone_batch_norms": true,
+  "hidden_expansion": 1.0,
   "id2label": {
+    "0": "road-traffic",
+    "1": "bicycles",
+    "2": "buses",
+    "3": "crosswalks",
+    "4": "fire hydrants",
+    "5": "motorcycles",
+    "6": "traffic lights",
+    "7": "vehicles"
   },
+  "initializer_bias_prior_prob": null,
+  "initializer_range": 0.01,
   "is_encoder_decoder": true,
   "label2id": {
+    "bicycles": 1,
+    "buses": 2,
+    "crosswalks": 3,
+    "fire hydrants": 4,
+    "motorcycles": 5,
+    "road-traffic": 0,
+    "traffic lights": 6,
+    "vehicles": 7
   },
+  "label_noise_ratio": 0.5,
+  "layer_norm_eps": 1e-05,
+  "learn_initial_query": false,
+  "matcher_alpha": 0.25,
+  "matcher_bbox_cost": 5.0,
+  "matcher_class_cost": 2.0,
+  "matcher_gamma": 2.0,
+  "matcher_giou_cost": 2.0,
+  "model_type": "rt_detr_v2",
+  "normalize_before": false,
+  "num_denoising": 100,
+  "num_feature_levels": 3,
   "num_queries": 300,
+  "positional_encoding_temperature": 10000,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
+  "use_cache": false,
+  "use_focal_loss": true,
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false,
+  "weight_loss_bbox": 5.0,
+  "weight_loss_giou": 2.0,
+  "weight_loss_vfl": 1.0,
+  "with_box_refine": true
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb52d41b98ea93c6a7c145148db3c1542160f57d09ea83e34cf29361b664a844
-size 174077740

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c0b275531c2f656740aa63a10c952053fd8c95b61c914b8f9b98b80516233e2
+size 168887224

preprocessor_config.json CHANGED Viewed

@@ -1,7 +1,6 @@
 {
-  "data_format": "channels_first",
   "do_convert_annotations": true,
-  "do_normalize": true,
   "do_pad": true,
   "do_rescale": true,
   "do_resize": true,
@@ -11,20 +10,20 @@
     0.456,
     0.406
   ],
-  "image_processor_type": "ConditionalDetrImageProcessorFast",
   "image_std": [
     0.229,
     0.224,
     0.225
   ],
   "pad_size": {
-    "height": 480,
-    "width": 480
   },
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "max_height": 480,
-    "max_width": 480
   }
 }

 {
   "do_convert_annotations": true,
+  "do_normalize": false,
   "do_pad": true,
   "do_rescale": true,
   "do_resize": true,
     0.456,
     0.406
   ],
+  "image_processor_type": "RTDetrImageProcessor",
   "image_std": [
     0.229,
     0.224,
     0.225
   ],
   "pad_size": {
+    "height": 640,
+    "width": 640
   },
   "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "max_height": 640,
+    "max_width": 640
   }
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba2bac2e45dc88fd8c1930f3e4663225d1c686e7a3d840161655bb704bbde682
 size 5201

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc26235de7531145f9e2db59614c1be6fe7e00a1f6fad2226e9def19b2a924d5
 size 5201