zelk12 commited on
Commit
ee3f5fd
·
verified ·
1 Parent(s): e4f8b7d

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  base_model:
3
- - zelk12/MT4-Gen4-MAMM-gemma-2-9B
4
- - zelk12/MT4-Gen4-GMUBI-gemma-2-9B
5
  library_name: transformers
6
  tags:
7
  - mergekit
@@ -20,8 +20,8 @@ This model was merged using the SLERP merge method.
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
- * [zelk12/MT4-Gen4-MAMM-gemma-2-9B](https://huggingface.co/zelk12/MT4-Gen4-MAMM-gemma-2-9B)
24
- * [zelk12/MT4-Gen4-GMUBI-gemma-2-9B](https://huggingface.co/zelk12/MT4-Gen4-GMUBI-gemma-2-9B)
25
 
26
  ### Configuration
27
 
@@ -29,10 +29,10 @@ The following YAML configuration was used to produce this model:
29
 
30
  ```yaml
31
  models:
32
- - model: zelk12/MT4-Gen4-MAMM-gemma-2-9B
33
- - model: zelk12/MT4-Gen4-GMUBI-gemma-2-9B
34
  merge_method: slerp
35
- base_model: zelk12/MT4-Gen4-MAMM-gemma-2-9B
36
  dtype: bfloat16
37
  parameters:
38
  t: 0.25
 
1
  ---
2
  base_model:
3
+ - zelk12/MT4-Gen4-GMU-gemma-2-9B
4
+ - zelk12/MT4-Gen4-BI-gemma-2-9B
5
  library_name: transformers
6
  tags:
7
  - mergekit
 
20
  ### Models Merged
21
 
22
  The following models were included in the merge:
23
+ * [zelk12/MT4-Gen4-GMU-gemma-2-9B](https://huggingface.co/zelk12/MT4-Gen4-GMU-gemma-2-9B)
24
+ * [zelk12/MT4-Gen4-BI-gemma-2-9B](https://huggingface.co/zelk12/MT4-Gen4-BI-gemma-2-9B)
25
 
26
  ### Configuration
27
 
 
29
 
30
  ```yaml
31
  models:
32
+ - model: zelk12/MT4-Gen4-GMU-gemma-2-9B
33
+ - model: zelk12/MT4-Gen4-BI-gemma-2-9B
34
  merge_method: slerp
35
+ base_model: zelk12/MT4-Gen4-GMU-gemma-2-9B
36
  dtype: bfloat16
37
  parameters:
38
  t: 0.25
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "zelk12/MT4-Gen4-MAMM-gemma-2-9B",
3
  "architectures": [
4
  "Gemma2ForCausalLM"
5
  ],
@@ -22,14 +22,13 @@
22
  "num_hidden_layers": 42,
23
  "num_key_value_heads": 8,
24
  "pad_token_id": 0,
25
- "query_pre_attn_scalar": 256,
26
  "rms_norm_eps": 1e-06,
27
  "rope_theta": 10000.0,
28
  "sliding_window": 4096,
29
  "sliding_window_size": 4096,
30
  "torch_dtype": "bfloat16",
31
  "transformers_version": "4.46.2",
32
- "unsloth_version": "2024.9",
33
  "use_cache": true,
34
  "vocab_size": 256000
35
  }
 
1
  {
2
+ "_name_or_path": "zelk12/MT4-Gen4-GMU-gemma-2-9B",
3
  "architectures": [
4
  "Gemma2ForCausalLM"
5
  ],
 
22
  "num_hidden_layers": 42,
23
  "num_key_value_heads": 8,
24
  "pad_token_id": 0,
25
+ "query_pre_attn_scalar": 224,
26
  "rms_norm_eps": 1e-06,
27
  "rope_theta": 10000.0,
28
  "sliding_window": 4096,
29
  "sliding_window_size": 4096,
30
  "torch_dtype": "bfloat16",
31
  "transformers_version": "4.46.2",
 
32
  "use_cache": true,
33
  "vocab_size": 256000
34
  }
mergekit_config.yml CHANGED
@@ -1,8 +1,8 @@
1
  models:
2
- - model: zelk12/MT4-Gen4-MAMM-gemma-2-9B
3
- - model: zelk12/MT4-Gen4-GMUBI-gemma-2-9B
4
  merge_method: slerp
5
- base_model: zelk12/MT4-Gen4-MAMM-gemma-2-9B
6
  dtype: bfloat16
7
  parameters:
8
  t: 0.25
 
1
  models:
2
+ - model: zelk12/MT4-Gen4-GMU-gemma-2-9B
3
+ - model: zelk12/MT4-Gen4-BI-gemma-2-9B
4
  merge_method: slerp
5
+ base_model: zelk12/MT4-Gen4-GMU-gemma-2-9B
6
  dtype: bfloat16
7
  parameters:
8
  t: 0.25
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:724c9b65d5b78d1d0c1ef17b2458eb1420f237d9602c57550149a7595d72372d
3
  size 4961959144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f946bea8cab7dcc123f890965093bdb817ba82988df0780c6fd1f84c8ec247
3
  size 4961959144
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:268c4bc2d69ba3f7a29fad1cdeb4a5206c7fab201dc93eacd659ea4db0fc7eff
3
  size 4976923496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59829ba81434e1effac2a585cc9df2d6fc9be12a0dd95c8454648d7d046ed7fe
3
  size 4976923496
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c48d8d41b3675b6604b06b11bbc5c1e344f0672092c7e035484330bccfa347c
3
  size 4932868832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc9e4747e688e342565b8a4019537062e87dc105059095e89abf2b1a19398b5
3
  size 4932868832
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0d4b66cb380977d96648a8838ed65f6e504e82248430482dadbb25b6274834a
3
  size 4976923456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6079ae7484d6732ddcb7d878967de2092e9235750ffdf2eb5bd49e00be540a
3
  size 4976923456
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11b5aef392ba912c71d18a79f7f9ef566886e86cb39e2747a0dc198674da0229
3
  size 469799616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc191b66feb6d9278e53cf893fd62066af0f717c992ba19ebc6cd00a3b70db9
3
  size 469799616
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060
3
- size 34362873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0
3
+ size 34356041
tokenizer_config.json CHANGED
@@ -1737,262 +1737,6 @@
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
1740
- },
1741
- "255968": {
1742
- "content": "[toxicity=0]",
1743
- "lstrip": false,
1744
- "normalized": false,
1745
- "rstrip": false,
1746
- "single_word": false,
1747
- "special": false
1748
- },
1749
- "255969": {
1750
- "content": "\t\t",
1751
- "lstrip": false,
1752
- "normalized": false,
1753
- "rstrip": false,
1754
- "single_word": false,
1755
- "special": false
1756
- },
1757
- "255970": {
1758
- "content": "\t\t\t",
1759
- "lstrip": false,
1760
- "normalized": false,
1761
- "rstrip": false,
1762
- "single_word": false,
1763
- "special": false
1764
- },
1765
- "255971": {
1766
- "content": "\t\t\t\t",
1767
- "lstrip": false,
1768
- "normalized": false,
1769
- "rstrip": false,
1770
- "single_word": false,
1771
- "special": false
1772
- },
1773
- "255972": {
1774
- "content": "\t\t\t\t\t",
1775
- "lstrip": false,
1776
- "normalized": false,
1777
- "rstrip": false,
1778
- "single_word": false,
1779
- "special": false
1780
- },
1781
- "255973": {
1782
- "content": "\t\t\t\t\t\t",
1783
- "lstrip": false,
1784
- "normalized": false,
1785
- "rstrip": false,
1786
- "single_word": false,
1787
- "special": false
1788
- },
1789
- "255974": {
1790
- "content": "\t\t\t\t\t\t\t",
1791
- "lstrip": false,
1792
- "normalized": false,
1793
- "rstrip": false,
1794
- "single_word": false,
1795
- "special": false
1796
- },
1797
- "255975": {
1798
- "content": "\t\t\t\t\t\t\t\t",
1799
- "lstrip": false,
1800
- "normalized": false,
1801
- "rstrip": false,
1802
- "single_word": false,
1803
- "special": false
1804
- },
1805
- "255976": {
1806
- "content": "\t\t\t\t\t\t\t\t\t",
1807
- "lstrip": false,
1808
- "normalized": false,
1809
- "rstrip": false,
1810
- "single_word": false,
1811
- "special": false
1812
- },
1813
- "255977": {
1814
- "content": "\t\t\t\t\t\t\t\t\t\t",
1815
- "lstrip": false,
1816
- "normalized": false,
1817
- "rstrip": false,
1818
- "single_word": false,
1819
- "special": false
1820
- },
1821
- "255978": {
1822
- "content": "\t\t\t\t\t\t\t\t\t\t\t",
1823
- "lstrip": false,
1824
- "normalized": false,
1825
- "rstrip": false,
1826
- "single_word": false,
1827
- "special": false
1828
- },
1829
- "255979": {
1830
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
1831
- "lstrip": false,
1832
- "normalized": false,
1833
- "rstrip": false,
1834
- "single_word": false,
1835
- "special": false
1836
- },
1837
- "255980": {
1838
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
1839
- "lstrip": false,
1840
- "normalized": false,
1841
- "rstrip": false,
1842
- "single_word": false,
1843
- "special": false
1844
- },
1845
- "255981": {
1846
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1847
- "lstrip": false,
1848
- "normalized": false,
1849
- "rstrip": false,
1850
- "single_word": false,
1851
- "special": false
1852
- },
1853
- "255982": {
1854
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1855
- "lstrip": false,
1856
- "normalized": false,
1857
- "rstrip": false,
1858
- "single_word": false,
1859
- "special": false
1860
- },
1861
- "255983": {
1862
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1863
- "lstrip": false,
1864
- "normalized": false,
1865
- "rstrip": false,
1866
- "single_word": false,
1867
- "special": false
1868
- },
1869
- "255984": {
1870
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1871
- "lstrip": false,
1872
- "normalized": false,
1873
- "rstrip": false,
1874
- "single_word": false,
1875
- "special": false
1876
- },
1877
- "255985": {
1878
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1879
- "lstrip": false,
1880
- "normalized": false,
1881
- "rstrip": false,
1882
- "single_word": false,
1883
- "special": false
1884
- },
1885
- "255986": {
1886
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1887
- "lstrip": false,
1888
- "normalized": false,
1889
- "rstrip": false,
1890
- "single_word": false,
1891
- "special": false
1892
- },
1893
- "255987": {
1894
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1895
- "lstrip": false,
1896
- "normalized": false,
1897
- "rstrip": false,
1898
- "single_word": false,
1899
- "special": false
1900
- },
1901
- "255988": {
1902
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1903
- "lstrip": false,
1904
- "normalized": false,
1905
- "rstrip": false,
1906
- "single_word": false,
1907
- "special": false
1908
- },
1909
- "255989": {
1910
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1911
- "lstrip": false,
1912
- "normalized": false,
1913
- "rstrip": false,
1914
- "single_word": false,
1915
- "special": false
1916
- },
1917
- "255990": {
1918
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1919
- "lstrip": false,
1920
- "normalized": false,
1921
- "rstrip": false,
1922
- "single_word": false,
1923
- "special": false
1924
- },
1925
- "255991": {
1926
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1927
- "lstrip": false,
1928
- "normalized": false,
1929
- "rstrip": false,
1930
- "single_word": false,
1931
- "special": false
1932
- },
1933
- "255992": {
1934
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1935
- "lstrip": false,
1936
- "normalized": false,
1937
- "rstrip": false,
1938
- "single_word": false,
1939
- "special": false
1940
- },
1941
- "255993": {
1942
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1943
- "lstrip": false,
1944
- "normalized": false,
1945
- "rstrip": false,
1946
- "single_word": false,
1947
- "special": false
1948
- },
1949
- "255994": {
1950
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1951
- "lstrip": false,
1952
- "normalized": false,
1953
- "rstrip": false,
1954
- "single_word": false,
1955
- "special": false
1956
- },
1957
- "255995": {
1958
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1959
- "lstrip": false,
1960
- "normalized": false,
1961
- "rstrip": false,
1962
- "single_word": false,
1963
- "special": false
1964
- },
1965
- "255996": {
1966
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1967
- "lstrip": false,
1968
- "normalized": false,
1969
- "rstrip": false,
1970
- "single_word": false,
1971
- "special": false
1972
- },
1973
- "255997": {
1974
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1975
- "lstrip": false,
1976
- "normalized": false,
1977
- "rstrip": false,
1978
- "single_word": false,
1979
- "special": false
1980
- },
1981
- "255998": {
1982
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1983
- "lstrip": false,
1984
- "normalized": false,
1985
- "rstrip": false,
1986
- "single_word": false,
1987
- "special": false
1988
- },
1989
- "255999": {
1990
- "content": "<unused99>",
1991
- "lstrip": false,
1992
- "normalized": false,
1993
- "rstrip": false,
1994
- "single_word": false,
1995
- "special": false
1996
  }
1997
  },
1998
  "additional_special_tokens": [
@@ -2003,9 +1747,8 @@
2003
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
2004
  "clean_up_tokenization_spaces": false,
2005
  "eos_token": "<eos>",
2006
- "model_max_length": 1000000000000000019884624838656,
2007
  "pad_token": "<pad>",
2008
- "padding_side": "left",
2009
  "sp_model_kwargs": {},
2010
  "spaces_between_special_tokens": false,
2011
  "tokenizer_class": "GemmaTokenizer",
 
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1740
  }
1741
  },
1742
  "additional_special_tokens": [
 
1747
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
1750
+ "model_max_length": 2048,
1751
  "pad_token": "<pad>",
 
1752
  "sp_model_kwargs": {},
1753
  "spaces_between_special_tokens": false,
1754
  "tokenizer_class": "GemmaTokenizer",