TroyDoesAI commited on
Commit
074523c
1 Parent(s): 2644870

A Lora Of BlackSheep Vision For Phi-3-Mini

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. BlackSheepVision-Phi-3/README.md +202 -0
  2. BlackSheepVision-Phi-3/adapter_config.json +34 -0
  3. BlackSheepVision-Phi-3/adapter_model.safetensors +3 -0
  4. BlackSheepVision-Phi-3/checkpoint-1000/README.md +202 -0
  5. BlackSheepVision-Phi-3/checkpoint-1000/adapter_config.json +34 -0
  6. BlackSheepVision-Phi-3/checkpoint-1000/adapter_model.safetensors +3 -0
  7. BlackSheepVision-Phi-3/checkpoint-1000/training_log.json +12 -0
  8. BlackSheepVision-Phi-3/checkpoint-1000/training_prompt.json +16 -0
  9. BlackSheepVision-Phi-3/checkpoint-2000/README.md +202 -0
  10. BlackSheepVision-Phi-3/checkpoint-2000/adapter_config.json +34 -0
  11. BlackSheepVision-Phi-3/checkpoint-2000/adapter_model.safetensors +3 -0
  12. BlackSheepVision-Phi-3/checkpoint-2000/training_log.json +12 -0
  13. BlackSheepVision-Phi-3/checkpoint-2000/training_prompt.json +16 -0
  14. BlackSheepVision-Phi-3/checkpoint-3000/README.md +202 -0
  15. BlackSheepVision-Phi-3/checkpoint-3000/adapter_config.json +34 -0
  16. BlackSheepVision-Phi-3/checkpoint-3000/adapter_model.safetensors +3 -0
  17. BlackSheepVision-Phi-3/checkpoint-3000/training_log.json +12 -0
  18. BlackSheepVision-Phi-3/checkpoint-3000/training_prompt.json +16 -0
  19. BlackSheepVision-Phi-3/checkpoint-4000/README.md +202 -0
  20. BlackSheepVision-Phi-3/checkpoint-4000/adapter_config.json +34 -0
  21. BlackSheepVision-Phi-3/checkpoint-4000/adapter_model.safetensors +3 -0
  22. BlackSheepVision-Phi-3/checkpoint-4000/training_log.json +12 -0
  23. BlackSheepVision-Phi-3/checkpoint-4000/training_prompt.json +16 -0
  24. BlackSheepVision-Phi-3/runs/Aug10_10-40-54_Troy-New-PC/events.out.tfevents.1723311654.Troy-New-PC.22016.0 +3 -0
  25. BlackSheepVision-Phi-3/runs/Aug10_15-02-36_Troy-New-PC/events.out.tfevents.1723327356.Troy-New-PC.19660.0 +3 -0
  26. BlackSheepVision-Phi-3/runs/Aug10_15-12-06_Troy-New-PC/events.out.tfevents.1723327926.Troy-New-PC.19660.1 +3 -0
  27. BlackSheepVision-Phi-3/runs/Aug10_15-40-50_Troy-New-PC/events.out.tfevents.1723329650.Troy-New-PC.19660.2 +3 -0
  28. BlackSheepVision-Phi-3/runs/Aug10_15-48-28_Troy-New-PC/events.out.tfevents.1723330108.Troy-New-PC.19660.3 +3 -0
  29. BlackSheepVision-Phi-3/runs/Aug10_15-50-50_Troy-New-PC/events.out.tfevents.1723330250.Troy-New-PC.26212.0 +3 -0
  30. BlackSheepVision-Phi-3/runs/Aug10_15-54-59_Troy-New-PC/events.out.tfevents.1723330499.Troy-New-PC.24024.0 +3 -0
  31. BlackSheepVision-Phi-3/runs/Aug10_16-25-11_Troy-New-PC/events.out.tfevents.1723332311.Troy-New-PC.16236.0 +3 -0
  32. BlackSheepVision-Phi-3/runs/Aug10_16-32-57_Troy-New-PC/events.out.tfevents.1723332777.Troy-New-PC.16236.1 +3 -0
  33. BlackSheepVision-Phi-3/runs/Aug10_16-35-16_Troy-New-PC/events.out.tfevents.1723332916.Troy-New-PC.15040.0 +3 -0
  34. BlackSheepVision-Phi-3/runs/Aug10_17-21-18_Troy-New-PC/events.out.tfevents.1723335678.Troy-New-PC.9632.0 +3 -0
  35. BlackSheepVision-Phi-3/runs/Aug10_17-56-02_Troy-New-PC/events.out.tfevents.1723337762.Troy-New-PC.22188.0 +3 -0
  36. BlackSheepVision-Phi-3/runs/Aug10_17-57-40_Troy-New-PC/events.out.tfevents.1723337860.Troy-New-PC.15172.0 +3 -0
  37. BlackSheepVision-Phi-3/runs/Aug10_17-58-53_Troy-New-PC/events.out.tfevents.1723337933.Troy-New-PC.22412.0 +3 -0
  38. BlackSheepVision-Phi-3/runs/Aug10_18-00-16_Troy-New-PC/events.out.tfevents.1723338016.Troy-New-PC.9200.0 +3 -0
  39. BlackSheepVision-Phi-3/runs/Aug10_21-03-19_Troy-New-PC/events.out.tfevents.1723349000.Troy-New-PC.9200.1 +3 -0
  40. BlackSheepVision-Phi-3/runs/Aug10_21-10-34_Troy-New-PC/events.out.tfevents.1723349435.Troy-New-PC.8440.0 +3 -0
  41. BlackSheepVision-Phi-3/runs/Aug10_21-14-10_Troy-New-PC/events.out.tfevents.1723349650.Troy-New-PC.8440.1 +3 -0
  42. BlackSheepVision-Phi-3/runs/Aug11_07-57-56_Troy-New-PC/events.out.tfevents.1723388276.Troy-New-PC.8440.2 +3 -0
  43. BlackSheepVision-Phi-3/runs/Aug11_09-22-10_Troy-New-PC/events.out.tfevents.1723393331.Troy-New-PC.8440.3 +3 -0
  44. BlackSheepVision-Phi-3/runs/Aug11_10-11-33_Troy-New-PC/events.out.tfevents.1723396293.Troy-New-PC.3004.0 +3 -0
  45. BlackSheepVision-Phi-3/runs/Aug11_10-13-19_Troy-New-PC/events.out.tfevents.1723396399.Troy-New-PC.5380.0 +3 -0
  46. BlackSheepVision-Phi-3/runs/Aug11_10-14-32_Troy-New-PC/events.out.tfevents.1723396473.Troy-New-PC.9536.0 +3 -0
  47. BlackSheepVision-Phi-3/runs/Aug11_10-15-46_Troy-New-PC/events.out.tfevents.1723396546.Troy-New-PC.21572.0 +3 -0
  48. BlackSheepVision-Phi-3/runs/Aug11_10-18-58_Troy-New-PC/events.out.tfevents.1723396738.Troy-New-PC.12708.0 +3 -0
  49. BlackSheepVision-Phi-3/runs/Aug11_10-22-18_Troy-New-PC/events.out.tfevents.1723396938.Troy-New-PC.13048.0 +3 -0
  50. BlackSheepVision-Phi-3/training_log.json +17 -0
BlackSheepVision-Phi-3/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: models\microsoft_Phi-3-vision-128k-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
BlackSheepVision-Phi-3/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "models\\microsoft_Phi-3-vision-128k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 4096,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2048,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "k_proj",
25
+ "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
BlackSheepVision-Phi-3/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7563b9393a75fd8525e5eba4c801f65a93ead90ddbc23f21ae202aada3242e
3
+ size 5771408256
BlackSheepVision-Phi-3/checkpoint-1000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: models\microsoft_Phi-3-vision-128k-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
BlackSheepVision-Phi-3/checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "models\\microsoft_Phi-3-vision-128k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 4096,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2048,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "k_proj",
25
+ "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
BlackSheepVision-Phi-3/checkpoint-1000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29aabdd5ebb2c0f9f9ca3e53f8f185afce27f1217865a83db49c9f645533bff7
3
+ size 5771408256
BlackSheepVision-Phi-3/checkpoint-1000/training_log.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "microsoft_Phi-3-vision-128k-instruct",
3
+ "base_model_class": "Phi3VForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, v, k, o",
7
+ "loss": 1.0155,
8
+ "grad_norm": 0.855490505695343,
9
+ "learning_rate": 9.886842462960446e-05,
10
+ "epoch": 0.47573739295908657,
11
+ "current_steps": 999
12
+ }
BlackSheepVision-Phi-3/checkpoint-1000/training_prompt.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_type": "dataset",
3
+ "template_1": "Who is your creator?\n\n%creator%",
4
+ "template_2": "%ChatML_CONVO%",
5
+ "template_3": "<|im_start|> I want you to say this next statement like a Gen-Z. \n %English% \n <|im_start|> %Gen-Z% <|im_end|>",
6
+ "template_4": "<|im_start|> %prompt% \n <|im_start|> %output% <|im_end|>",
7
+ "template_5": "<|im_start|> %instruction% \n <|im_start|> %output% <|im_end|>",
8
+ "template_6": "<|im_start|> %instruction%\n %input%\n <|im_start|> %output% <|im_end|>",
9
+ "template_7": "<|im_start|> %question% \n <|im_start|> %output% <|im_end|>",
10
+ "template_8": "<|im_start|> %question% \n <|im_start|> %response% <|im_end|>",
11
+ "template_9": "<|im_start|> %perspective% \n <|im_start|> %output% <|im_end|>",
12
+ "template_10": "<|im_start|> %perspective%\n %input%\n <|im_start|> %output% <|im_end|>",
13
+ "template_11": "<|im_start|> %instruction% \n <|im_start|> %toxic% <|im_end|>",
14
+ "template_12": "<|im_start|> %prompt% \n <|im_start|> %chosen% <|im_end|>",
15
+ "template_13": "<|im_start|> system\n : %system% \n <|im_end|> \n <|im_start|> %user% \n <|im_end|> \n <|im_start|> %assistant%"
16
+ }
BlackSheepVision-Phi-3/checkpoint-2000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: models\microsoft_Phi-3-vision-128k-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
BlackSheepVision-Phi-3/checkpoint-2000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "models\\microsoft_Phi-3-vision-128k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 4096,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2048,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "k_proj",
25
+ "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
BlackSheepVision-Phi-3/checkpoint-2000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155b56140389f259fd74c3724efdca122ba6c3563feed3f652874f63e453fec7
3
+ size 5771408256
BlackSheepVision-Phi-3/checkpoint-2000/training_log.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "microsoft_Phi-3-vision-128k-instruct",
3
+ "base_model_class": "Phi3VForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, v, k, o",
7
+ "loss": 0.9775,
8
+ "grad_norm": 1.721966028213501,
9
+ "learning_rate": 9.773684925920892e-05,
10
+ "epoch": 0.9514747859181731,
11
+ "current_steps": 1999
12
+ }
BlackSheepVision-Phi-3/checkpoint-2000/training_prompt.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_type": "dataset",
3
+ "template_1": "Who is your creator?\n\n%creator%",
4
+ "template_2": "%ChatML_CONVO%",
5
+ "template_3": "<|im_start|> I want you to say this next statement like a Gen-Z. \n %English% \n <|im_start|> %Gen-Z% <|im_end|>",
6
+ "template_4": "<|im_start|> %prompt% \n <|im_start|> %output% <|im_end|>",
7
+ "template_5": "<|im_start|> %instruction% \n <|im_start|> %output% <|im_end|>",
8
+ "template_6": "<|im_start|> %instruction%\n %input%\n <|im_start|> %output% <|im_end|>",
9
+ "template_7": "<|im_start|> %question% \n <|im_start|> %output% <|im_end|>",
10
+ "template_8": "<|im_start|> %question% \n <|im_start|> %response% <|im_end|>",
11
+ "template_9": "<|im_start|> %perspective% \n <|im_start|> %output% <|im_end|>",
12
+ "template_10": "<|im_start|> %perspective%\n %input%\n <|im_start|> %output% <|im_end|>",
13
+ "template_11": "<|im_start|> %instruction% \n <|im_start|> %toxic% <|im_end|>",
14
+ "template_12": "<|im_start|> %prompt% \n <|im_start|> %chosen% <|im_end|>",
15
+ "template_13": "<|im_start|> system\n : %system% \n <|im_end|> \n <|im_start|> %user% \n <|im_end|> \n <|im_start|> %assistant%"
16
+ }
BlackSheepVision-Phi-3/checkpoint-3000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: models\microsoft_Phi-3-vision-128k-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
BlackSheepVision-Phi-3/checkpoint-3000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "models\\microsoft_Phi-3-vision-128k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 4096,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2048,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "k_proj",
25
+ "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
BlackSheepVision-Phi-3/checkpoint-3000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f506d681fc893c8d5306386ad1e56f5feaaf3eecbf94a3bf92c0a6d35a33a4
3
+ size 5771408256
BlackSheepVision-Phi-3/checkpoint-3000/training_log.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "microsoft_Phi-3-vision-128k-instruct",
3
+ "base_model_class": "Phi3VForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, v, k, o",
7
+ "loss": 0.5155,
8
+ "grad_norm": 1.1744245290756226,
9
+ "learning_rate": 9.660527388881338e-05,
10
+ "epoch": 1.4272121788772598,
11
+ "current_steps": 2999
12
+ }
BlackSheepVision-Phi-3/checkpoint-3000/training_prompt.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_type": "dataset",
3
+ "template_1": "Who is your creator?\n\n%creator%",
4
+ "template_2": "%ChatML_CONVO%",
5
+ "template_3": "<|im_start|> I want you to say this next statement like a Gen-Z. \n %English% \n <|im_start|> %Gen-Z% <|im_end|>",
6
+ "template_4": "<|im_start|> %prompt% \n <|im_start|> %output% <|im_end|>",
7
+ "template_5": "<|im_start|> %instruction% \n <|im_start|> %output% <|im_end|>",
8
+ "template_6": "<|im_start|> %instruction%\n %input%\n <|im_start|> %output% <|im_end|>",
9
+ "template_7": "<|im_start|> %question% \n <|im_start|> %output% <|im_end|>",
10
+ "template_8": "<|im_start|> %question% \n <|im_start|> %response% <|im_end|>",
11
+ "template_9": "<|im_start|> %perspective% \n <|im_start|> %output% <|im_end|>",
12
+ "template_10": "<|im_start|> %perspective%\n %input%\n <|im_start|> %output% <|im_end|>",
13
+ "template_11": "<|im_start|> %instruction% \n <|im_start|> %toxic% <|im_end|>",
14
+ "template_12": "<|im_start|> %prompt% \n <|im_start|> %chosen% <|im_end|>",
15
+ "template_13": "<|im_start|> system\n : %system% \n <|im_end|> \n <|im_start|> %user% \n <|im_end|> \n <|im_start|> %assistant%"
16
+ }
BlackSheepVision-Phi-3/checkpoint-4000/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: models\microsoft_Phi-3-vision-128k-instruct
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
BlackSheepVision-Phi-3/checkpoint-4000/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "models\\microsoft_Phi-3-vision-128k-instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 4096,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 2048,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "k_proj",
25
+ "o_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
BlackSheepVision-Phi-3/checkpoint-4000/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15d106bbc8ea868b631b6263e27c80d0d16bdf27ef70c23e92db1c5b07f35ae
3
+ size 5771408256
BlackSheepVision-Phi-3/checkpoint-4000/training_log.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "microsoft_Phi-3-vision-128k-instruct",
3
+ "base_model_class": "Phi3VForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, v, k, o",
7
+ "loss": 0.649,
8
+ "grad_norm": 1.225587248802185,
9
+ "learning_rate": 9.547369851841784e-05,
10
+ "epoch": 1.9029495718363463,
11
+ "current_steps": 3999
12
+ }
BlackSheepVision-Phi-3/checkpoint-4000/training_prompt.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "template_type": "dataset",
3
+ "template_1": "Who is your creator?\n\n%creator%",
4
+ "template_2": "%ChatML_CONVO%",
5
+ "template_3": "<|im_start|> I want you to say this next statement like a Gen-Z. \n %English% \n <|im_start|> %Gen-Z% <|im_end|>",
6
+ "template_4": "<|im_start|> %prompt% \n <|im_start|> %output% <|im_end|>",
7
+ "template_5": "<|im_start|> %instruction% \n <|im_start|> %output% <|im_end|>",
8
+ "template_6": "<|im_start|> %instruction%\n %input%\n <|im_start|> %output% <|im_end|>",
9
+ "template_7": "<|im_start|> %question% \n <|im_start|> %output% <|im_end|>",
10
+ "template_8": "<|im_start|> %question% \n <|im_start|> %response% <|im_end|>",
11
+ "template_9": "<|im_start|> %perspective% \n <|im_start|> %output% <|im_end|>",
12
+ "template_10": "<|im_start|> %perspective%\n %input%\n <|im_start|> %output% <|im_end|>",
13
+ "template_11": "<|im_start|> %instruction% \n <|im_start|> %toxic% <|im_end|>",
14
+ "template_12": "<|im_start|> %prompt% \n <|im_start|> %chosen% <|im_end|>",
15
+ "template_13": "<|im_start|> system\n : %system% \n <|im_end|> \n <|im_start|> %user% \n <|im_end|> \n <|im_start|> %assistant%"
16
+ }
BlackSheepVision-Phi-3/runs/Aug10_10-40-54_Troy-New-PC/events.out.tfevents.1723311654.Troy-New-PC.22016.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099c012ba940bb7d2e790d86a7c955fc0f4ed8cb5245bdde209a7a29d5eb43c2
3
+ size 63880
BlackSheepVision-Phi-3/runs/Aug10_15-02-36_Troy-New-PC/events.out.tfevents.1723327356.Troy-New-PC.19660.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c5a67d71062f0d9d8248bed764eed4575f8a7717b22b4899c55734fab5e4ff
3
+ size 6358
BlackSheepVision-Phi-3/runs/Aug10_15-12-06_Troy-New-PC/events.out.tfevents.1723327926.Troy-New-PC.19660.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f698f4c18c619fe51ffea7ba82fd68d3f9048c018bb98b633a84005a8c7e65
3
+ size 6346
BlackSheepVision-Phi-3/runs/Aug10_15-40-50_Troy-New-PC/events.out.tfevents.1723329650.Troy-New-PC.19660.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b7fc195da52fd1b181e23b725973caa6408e88358ac01d297dbed180655770
3
+ size 6553
BlackSheepVision-Phi-3/runs/Aug10_15-48-28_Troy-New-PC/events.out.tfevents.1723330108.Troy-New-PC.19660.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24b83801e1c45219db42779330b54432ee8d0681a650926ebfa588ca25c82282
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_15-50-50_Troy-New-PC/events.out.tfevents.1723330250.Troy-New-PC.26212.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c3a9fcb9a6c720b44f6f816bb4a04909fb8bc504a13030c034939e397cf5cc
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_15-54-59_Troy-New-PC/events.out.tfevents.1723330499.Troy-New-PC.24024.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93bbca065ef1cc0131af9debb6b4bfaf94ef650e866fdbdc91c238b33c5f509
3
+ size 8690
BlackSheepVision-Phi-3/runs/Aug10_16-25-11_Troy-New-PC/events.out.tfevents.1723332311.Troy-New-PC.16236.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bb3163f967d46380f3cf70d0ffbf0cbcdcc3b15caefce9de3581082d72f4308
3
+ size 5933
BlackSheepVision-Phi-3/runs/Aug10_16-32-57_Troy-New-PC/events.out.tfevents.1723332777.Troy-New-PC.16236.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a3254dc1542ea44995a2daf31ee1880d12b0c5c2d1f891af3c020f71ca8a8c
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_16-35-16_Troy-New-PC/events.out.tfevents.1723332916.Troy-New-PC.15040.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41985da8e645d226d39d9c81a1f57231a6adde20bf2a3bb8a51c45bfb15d3d2e
3
+ size 12595
BlackSheepVision-Phi-3/runs/Aug10_17-21-18_Troy-New-PC/events.out.tfevents.1723335678.Troy-New-PC.9632.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31a5d9db85b4c9a8dd5544d62f229a0ea141b5147ad3c3f95163d56ca894750
3
+ size 47832
BlackSheepVision-Phi-3/runs/Aug10_17-56-02_Troy-New-PC/events.out.tfevents.1723337762.Troy-New-PC.22188.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d758a24bd567853a3c50507f4b9b7468e946049103f268547b3f2dcfeb9fc815
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_17-57-40_Troy-New-PC/events.out.tfevents.1723337860.Troy-New-PC.15172.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e326817cb78581d8b21921f8c1b3a9d56bb69bcde8aa5de9c5af59abe3148637
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_17-58-53_Troy-New-PC/events.out.tfevents.1723337933.Troy-New-PC.22412.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de0e5d9d0b68ab9d41aa1e969c132bf1cb161ce63f3171384593af197b62daf
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_18-00-16_Troy-New-PC/events.out.tfevents.1723338016.Troy-New-PC.9200.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0bf93777958ccae26cfd15308e5c6b763f860b3bdfe2597759947396a358a32
3
+ size 36438
BlackSheepVision-Phi-3/runs/Aug10_21-03-19_Troy-New-PC/events.out.tfevents.1723349000.Troy-New-PC.9200.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34dcd9366d7dd19c3805392402914b8f52fd7cc6281a2dd832b5e5c21b705ca9
3
+ size 5378
BlackSheepVision-Phi-3/runs/Aug10_21-10-34_Troy-New-PC/events.out.tfevents.1723349435.Troy-New-PC.8440.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6abf58b49a5c23030c57f3910b52f41f82e05415f7bbd0e6798628a6a5aa5fa
3
+ size 5933
BlackSheepVision-Phi-3/runs/Aug10_21-14-10_Troy-New-PC/events.out.tfevents.1723349650.Troy-New-PC.8440.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb457e4e01809c81f204571868046ae827686fc28e0d25ecb21482f47c89dddb
3
+ size 171478
BlackSheepVision-Phi-3/runs/Aug11_07-57-56_Troy-New-PC/events.out.tfevents.1723388276.Troy-New-PC.8440.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d8b2b45072fe1a703ecaf7165db2c4e786352c86227eae55da8d560945a779
3
+ size 25677
BlackSheepVision-Phi-3/runs/Aug11_09-22-10_Troy-New-PC/events.out.tfevents.1723393331.Troy-New-PC.8440.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a24b3234092e7cdb7890f921e535ef333b3a0998b6233a53f43c0fddab1238
3
+ size 64025
BlackSheepVision-Phi-3/runs/Aug11_10-11-33_Troy-New-PC/events.out.tfevents.1723396293.Troy-New-PC.3004.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb53c22488cf45e8761d87b1ab893cd32045ef70188eefe73902ab189e8cccf8
3
+ size 8422
BlackSheepVision-Phi-3/runs/Aug11_10-13-19_Troy-New-PC/events.out.tfevents.1723396399.Troy-New-PC.5380.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eec9410902a0ba3e90218a30d0f3cf9bbe66d37df319ca2f5d175320ce1c88c
3
+ size 8423
BlackSheepVision-Phi-3/runs/Aug11_10-14-32_Troy-New-PC/events.out.tfevents.1723396473.Troy-New-PC.9536.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff3401af62daff04da24364673f03a58c8f834d480f126e76e06ae570b6d75e4
3
+ size 8423
BlackSheepVision-Phi-3/runs/Aug11_10-15-46_Troy-New-PC/events.out.tfevents.1723396546.Troy-New-PC.21572.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c645d3e65856f03feb6baf4b1856c99c0c1109720f6408017f8a77a83cbd95e
3
+ size 8423
BlackSheepVision-Phi-3/runs/Aug11_10-18-58_Troy-New-PC/events.out.tfevents.1723396738.Troy-New-PC.12708.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db83d9102cb1cca251756088221070b64cef098dcd44f3fb0ba15cdeba546b5
3
+ size 8423
BlackSheepVision-Phi-3/runs/Aug11_10-22-18_Troy-New-PC/events.out.tfevents.1723396938.Troy-New-PC.13048.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cd51958843cce9335c9b7e6a5612d887b8b830cb37500d52a5234d5b5c0cef
3
+ size 113333
BlackSheepVision-Phi-3/training_log.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "microsoft_Phi-3-vision-128k-instruct",
3
+ "base_model_class": "Phi3VForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "gate, down, up, q, v, k, o",
7
+ "loss": 0.3423,
8
+ "grad_norm": 1.8447479009628296,
9
+ "learning_rate": 9.438738616283812e-05,
10
+ "epoch": 2.3596574690770695,
11
+ "current_steps": 4961,
12
+ "train_runtime": 14910.6647,
13
+ "train_samples_per_second": 5.921,
14
+ "train_steps_per_second": 2.96,
15
+ "total_flos": 6.693944118843802e+17,
16
+ "train_loss": 0.7060681312735523
17
+ }