louislu9911 commited on
Commit
8b497b0
1 Parent(s): 3c02200

End of training

Browse files
Files changed (6) hide show
  1. README.md +21 -25
  2. config.json +1 -1
  3. configuration_moe.py +3 -3
  4. model.safetensors +2 -2
  5. modeling_moe.py +19 -34
  6. training_args.bin +1 -1
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [](https://huggingface.co/) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.4329
21
- - Accuracy: 0.8668
22
 
23
  ## Model description
24
 
@@ -38,11 +38,11 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
- - train_batch_size: 200
42
- - eval_batch_size: 200
43
  - seed: 42
44
  - gradient_accumulation_steps: 4
45
- - total_train_batch_size: 800
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_ratio: 0.1
@@ -52,26 +52,22 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
- | 0.4831 | 0.99 | 24 | 0.5000 | 0.8607 |
56
- | 0.3545 | 1.98 | 48 | 0.4135 | 0.8696 |
57
- | 0.3388 | 2.97 | 72 | 0.4527 | 0.8654 |
58
- | 0.3378 | 4.0 | 97 | 0.4225 | 0.8706 |
59
- | 0.3372 | 4.99 | 121 | 0.4032 | 0.8654 |
60
- | 0.326 | 5.98 | 145 | 0.4222 | 0.8654 |
61
- | 0.3437 | 6.97 | 169 | 0.4231 | 0.8640 |
62
- | 0.3157 | 8.0 | 194 | 0.3980 | 0.8720 |
63
- | 0.3193 | 8.99 | 218 | 0.4001 | 0.8682 |
64
- | 0.3027 | 9.98 | 242 | 0.4163 | 0.8650 |
65
- | 0.2933 | 10.97 | 266 | 0.4105 | 0.8715 |
66
- | 0.3041 | 12.0 | 291 | 0.4004 | 0.8729 |
67
- | 0.2845 | 12.99 | 315 | 0.4020 | 0.8720 |
68
- | 0.2845 | 13.98 | 339 | 0.4223 | 0.8715 |
69
- | 0.2797 | 14.97 | 363 | 0.4089 | 0.8678 |
70
- | 0.295 | 16.0 | 388 | 0.4162 | 0.8701 |
71
- | 0.2767 | 16.99 | 412 | 0.4369 | 0.8678 |
72
- | 0.2692 | 17.98 | 436 | 0.4292 | 0.8678 |
73
- | 0.2543 | 18.97 | 460 | 0.4328 | 0.8659 |
74
- | 0.2743 | 19.79 | 480 | 0.4329 | 0.8668 |
75
 
76
 
77
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [](https://huggingface.co/) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3758
21
+ - Accuracy: 0.8762
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
+ - train_batch_size: 2000
42
+ - eval_batch_size: 2000
43
  - seed: 42
44
  - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 8000
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_ratio: 0.1
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
+ | No log | 0.8 | 2 | 0.4078 | 0.8724 |
56
+ | No log | 2.0 | 5 | 0.3771 | 0.8776 |
57
+ | No log | 2.8 | 7 | 0.3776 | 0.8762 |
58
+ | 0.3197 | 4.0 | 10 | 0.3773 | 0.8762 |
59
+ | 0.3197 | 4.8 | 12 | 0.3768 | 0.8762 |
60
+ | 0.3197 | 6.0 | 15 | 0.3762 | 0.8762 |
61
+ | 0.3197 | 6.8 | 17 | 0.3760 | 0.8762 |
62
+ | 0.2995 | 8.0 | 20 | 0.3759 | 0.8762 |
63
+ | 0.2995 | 8.8 | 22 | 0.3759 | 0.8762 |
64
+ | 0.2995 | 10.0 | 25 | 0.3758 | 0.8762 |
65
+ | 0.2995 | 10.8 | 27 | 0.3758 | 0.8762 |
66
+ | 0.2996 | 12.0 | 30 | 0.3758 | 0.8762 |
67
+ | 0.2996 | 12.8 | 32 | 0.3758 | 0.8762 |
68
+ | 0.2996 | 14.0 | 35 | 0.3758 | 0.8762 |
69
+ | 0.2996 | 14.8 | 37 | 0.3758 | 0.8762 |
70
+ | 0.3024 | 16.0 | 40 | 0.3758 | 0.8762 |
 
 
 
 
71
 
72
 
73
  ### Framework versions
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "AutoConfig": "configuration_moe.MoEConfig",
7
  "AutoModelForImageClassification": "modeling_moe.MoEModelForImageClassification"
8
  },
9
- "base_model": "louislu9911/BaseModel-leaf-disease-convnextv2-base-1k-224-0_1_2_3_4",
10
  "expert_class_mapping": {
11
  "0": [
12
  0,
 
6
  "AutoConfig": "configuration_moe.MoEConfig",
7
  "AutoModelForImageClassification": "modeling_moe.MoEModelForImageClassification"
8
  },
9
+ "baseline_model": "louislu9911/BaseModel-leaf-disease-convnextv2-base-1k-224-0_1_2_3_4",
10
  "expert_class_mapping": {
11
  "0": [
12
  0,
configuration_moe.py CHANGED
@@ -13,7 +13,7 @@ EXPERTS = [
13
  SWITCH_GATE = (
14
  f"{DEFAULT_HUGGINGFACE_ACCOUNT}/switch_gate-leaf-disease-{model_checkpoint}"
15
  )
16
- BASE_MODEL = (
17
  f"{DEFAULT_HUGGINGFACE_ACCOUNT}/BaseModel-leaf-disease-{model_checkpoint}-0_1_2_3_4"
18
  )
19
 
@@ -25,14 +25,14 @@ class MoEConfig(PretrainedConfig):
25
  self,
26
  experts: List[str] = EXPERTS,
27
  switch_gate: str = SWITCH_GATE,
28
- base_model: str = BASE_MODEL,
29
  num_classes: int = 5,
30
  expert_class_mapping: Dict[int, List[int]] = None,
31
  **kwargs,
32
  ):
33
  self.experts = experts
34
  self.switch_gate = switch_gate
35
- self.base_model = base_model
36
  self.num_classes = num_classes
37
  self.expert_class_mapping = expert_class_mapping
38
  super().__init__(**kwargs)
 
13
  SWITCH_GATE = (
14
  f"{DEFAULT_HUGGINGFACE_ACCOUNT}/switch_gate-leaf-disease-{model_checkpoint}"
15
  )
16
+ BASELINE_MODEL = (
17
  f"{DEFAULT_HUGGINGFACE_ACCOUNT}/BaseModel-leaf-disease-{model_checkpoint}-0_1_2_3_4"
18
  )
19
 
 
25
  self,
26
  experts: List[str] = EXPERTS,
27
  switch_gate: str = SWITCH_GATE,
28
+ baseline_model: str = BASELINE_MODEL,
29
  num_classes: int = 5,
30
  expert_class_mapping: Dict[int, List[int]] = None,
31
  **kwargs,
32
  ):
33
  self.experts = experts
34
  self.switch_gate = switch_gate
35
+ self.baseline_model = baseline_model
36
  self.num_classes = num_classes
37
  self.expert_class_mapping = expert_class_mapping
38
  super().__init__(**kwargs)
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6541987d439493d740c0ee1c047cba4fead52be9508a1520391b75ea8d4ced72
3
- size 2991581672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d274c9e2d535934cc02d89e5d0c7f84c2988a384bb531a7af68804378247063b
3
+ size 2022035716
modeling_moe.py CHANGED
@@ -5,14 +5,14 @@ from transformers import PreTrainedModel, AutoModelForImageClassification
5
  from .configuration_moe import MoEConfig
6
 
7
 
8
- def subgate(num_classes):
9
  layers = nn.Sequential(
10
  nn.Flatten(),
11
  nn.Linear(224 * 224 * 3, 1024),
12
  nn.ReLU(),
13
  nn.Linear(1024, 512),
14
  nn.ReLU(),
15
- nn.Linear(512, num_classes * 2),
16
  )
17
  return layers
18
 
@@ -26,11 +26,8 @@ class MoEModelForImageClassification(PreTrainedModel):
26
  self.switch_gate_model = AutoModelForImageClassification.from_pretrained(
27
  config.switch_gate
28
  )
29
- self.base_model1 = AutoModelForImageClassification.from_pretrained(
30
- config.base_model
31
- )
32
- self.base_model2 = AutoModelForImageClassification.from_pretrained(
33
- config.base_model
34
  )
35
  self.expert_model_1 = AutoModelForImageClassification.from_pretrained(
36
  config.experts[0]
@@ -39,13 +36,12 @@ class MoEModelForImageClassification(PreTrainedModel):
39
  config.experts[1]
40
  )
41
 
42
- self.subgate1 = subgate(config.num_classes)
43
- self.subgate2 = subgate(config.num_classes)
44
 
45
  # Freeze all params
46
  for module in [
47
- self.base_model1,
48
- self.base_model2,
49
  self.expert_model_1,
50
  self.expert_model_2,
51
  ]:
@@ -54,36 +50,25 @@ class MoEModelForImageClassification(PreTrainedModel):
54
 
55
  def forward(self, pixel_values, labels=None):
56
  switch_gate_result = self.switch_gate_model(pixel_values).logits
57
- base_model1_result = self.base_model1(pixel_values).logits
58
- base_model2_result = self.base_model2(pixel_values).logits
59
-
60
  expert1_result = self.expert_model_1(pixel_values).logits
61
  expert2_result = self.expert_model_2(pixel_values).logits
62
 
63
- subgate1_result = self.subgate1(pixel_values)
64
- subgate1_result = torch.reshape(subgate1_result, (2, -1, self.num_classes))
 
 
65
 
66
- subgate2_result = self.subgate2(pixel_values)
67
- subgate2_result = torch.reshape(subgate2_result, (2, -1, self.num_classes))
68
 
69
- expert1_and_base_res = (
70
- expert1_result * subgate1_result[0, :, :]
71
- + base_model1_result * subgate1_result[1, :, :]
72
- )
73
- expert2_and_base_res = (
74
- expert2_result * subgate2_result[0, :, :]
75
- + base_model2_result * subgate2_result[1, :, :]
76
- )
77
 
78
- # Gating Network
79
- expert1_and_base_res = expert1_and_base_res * switch_gate_result[
80
- :, 0
81
- ].unsqueeze(1)
82
- expert2_and_base_res = expert2_and_base_res * switch_gate_result[
83
- :, 1
84
- ].unsqueeze(1)
85
 
86
- logits = expert1_and_base_res + expert2_and_base_res
87
  if labels is not None:
88
  loss = F.cross_entropy(logits, labels)
89
  return {"loss": loss, "logits": logits}
 
5
  from .configuration_moe import MoEConfig
6
 
7
 
8
+ def subgate(num_out):
9
  layers = nn.Sequential(
10
  nn.Flatten(),
11
  nn.Linear(224 * 224 * 3, 1024),
12
  nn.ReLU(),
13
  nn.Linear(1024, 512),
14
  nn.ReLU(),
15
+ nn.Linear(512, num_out),
16
  )
17
  return layers
18
 
 
26
  self.switch_gate_model = AutoModelForImageClassification.from_pretrained(
27
  config.switch_gate
28
  )
29
+ self.baseline_model = AutoModelForImageClassification.from_pretrained(
30
+ config.baseline_model
 
 
 
31
  )
32
  self.expert_model_1 = AutoModelForImageClassification.from_pretrained(
33
  config.experts[0]
 
36
  config.experts[1]
37
  )
38
 
39
+ self.subgate = subgate(2)
 
40
 
41
  # Freeze all params
42
  for module in [
43
+ self.switch_gate_model,
44
+ self.baseline_model,
45
  self.expert_model_1,
46
  self.expert_model_2,
47
  ]:
 
50
 
51
  def forward(self, pixel_values, labels=None):
52
  switch_gate_result = self.switch_gate_model(pixel_values).logits
 
 
 
53
  expert1_result = self.expert_model_1(pixel_values).logits
54
  expert2_result = self.expert_model_2(pixel_values).logits
55
 
56
+ # Gating Network
57
+ experts_result = torch.stack(
58
+ [expert1_result, expert2_result], dim=1
59
+ ) * switch_gate_result.unsqueeze(-1)
60
 
61
+ experts_result = experts_result.sum(dim=1)
62
+ baseline_model_result = self.baseline_model(pixel_values).logits
63
 
64
+ subgate_result = self.subgate(pixel_values)
65
+ subgate_prob = F.softmax(subgate_result, dim=-1)
 
 
 
 
 
 
66
 
67
+ experts_and_base_result = torch.stack(
68
+ [experts_result, baseline_model_result], dim=1
69
+ ) * subgate_prob.unsqueeze(-1)
 
 
 
 
70
 
71
+ logits = experts_and_base_result.sum(dim=1)
72
  if labels is not None:
73
  loss = F.cross_entropy(logits, labels)
74
  return {"loss": loss, "logits": logits}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64507f98f39450bae4ef79c4f93c178ab811178fca4cc4522e322b941641ec3
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b7c5c3611717751700245126f1467e9b2d7ee3ebb544c05ba4806732954b7e
3
  size 4984