LoneStriker commited on
Commit
4de2afe
·
verified ·
1 Parent(s): 6484926

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,9 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ internlm2-math-20b-llama-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
2
+ internlm2-math-20b-llama-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
3
+ internlm2-math-20b-llama-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
4
+ internlm2-math-20b-llama-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
5
+ internlm2-math-20b-llama-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
6
+ internlm2-math-20b-llama-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
7
+ internlm2-math-20b-llama-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
8
+ internlm2-math-20b-llama-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
9
+ internlm2-math-20b-llama-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: text-generation
3
+ license: other
4
+ language:
5
+ - en
6
+ - zh
7
+ tags:
8
+ - math
9
+ ---
10
+
11
+ # InternLM-Math
12
+
13
+ <div align="center">
14
+
15
+ <img src="https://raw.githubusercontent.com/InternLM/InternLM/main/assets/logo.svg" width="200"/>
16
+ <div> </div>
17
+ <div align="center">
18
+ <b><font size="5">InternLM-Math</font></b>
19
+ <sup>
20
+ <a href="https://internlm.intern-ai.org.cn/">
21
+ <i><font size="4">HOT</font></i>
22
+ </a>
23
+ </sup>
24
+ <div> </div>
25
+ </div>
26
+
27
+ State-of-the-art bilingual open-sourced Math reasoning LLMs.
28
+ </div>
29
+
30
+ # Introduction
31
+ - **7B and 20B Chinese and English Math LMs with better than ChatGPT performances.** InternLM2-Math are continued pretrained from InternLM2-Base with ~100B high quality math-related tokens and SFT with ~2M bilingual math supervised data. We apply minhash and exact number match to decontaminate possible test set leakage.
32
+ - **Add Lean as a support language for math problem solving and math theorem proving.** We are exploring combining Lean 3 with InternLM-Math for verifiable math reasoning. InternLM-Math can generate Lean codes for simple math reasoning tasks like GSM8K or provide possible proof tactics based on Lean states.
33
+ - **Also can be viewed as a reward model, which supports the Outcome/Process/Lean Reward Model.** We supervise InternLM2-Math with various types of reward modeling data, to make InternLM2-Math can also verify chain-of-thought processes. We also add the ability to convert a chain-of-thought process into Lean 3 code.
34
+ - **A Math LM Augment Helper** and **Code Intepreter**. InternLM2-Math can help augment math reasoning problems and solve them using the code interpreter which makes you generate synthesis data quicker!
35
+
36
+ # Models
37
+ | Model | Transformers(HF) |Release Date |
38
+ |---|---|---|
39
+ | **InternLM2-Math-Base-7B** | [🤗internlm/internlm2-math-base-7b](https://huggingface.co/internlm/internlm2-math-base-7b) | 2024-01-23|
40
+ | **InternLM2-Math-Base-20B** | [🤗internlm/internlm2-math-base-20b](https://huggingface.co/internlm/internlm2-math-base-20b) | 2024-01-23|
41
+ | **InternLM2-Math-7B** | [🤗internlm/internlm2-math-7b](https://huggingface.co/internlm/internlm2-math-7b) | 2024-01-23|
42
+ | **InternLM2-Math-20B** | [🤗internlm/internlm2-math-20b](https://huggingface.co/internlm/internlm2-math-20b) | 2024-01-23|
43
+
44
+
45
+ # Performance
46
+
47
+ ## Pretrain Performance
48
+ We evaluate pretrain checkpoints based on greedy decoding with few-shot COT. Details of pretraining will be introduced in the tech report.
49
+ | Model | GSM8K | MATH |
50
+ |------------------------|---------|--------|
51
+ | Llama2-7B | 11.8 | 3.2 |
52
+ | Llemma-7B | 36.4 | 18.0 |
53
+ | InternLM2-Base-7B | 36.5 | 8.6 |
54
+ | **InternLM2-Math-Base-7B** | **49.2** | **21.5** |
55
+ | Minerva-8B | 16.2 | 14.1 |
56
+ | InternLM2-Base-20B | 54.6 | 13.7 |
57
+ | **InternLM2-Math-Base-20B** | **63.7** | **27.3** |
58
+ | Llemma-34B | 51.5 | 25.0 |
59
+ | Minerva-62B | 52.4 | 27.6 |
60
+ | Minerva-540B | 58.8 | 33.6 |
61
+
62
+
63
+ ## SFT Peformance
64
+ All performance is based on greedy decoding with COT. We notice that the performance of Hungary has a big variance between our different checkpoints, while other performance is very stable. This may be due to the problem amount about Hungary.
65
+ | Model | Model Type | GSM8K | MATH | Hungary |
66
+ |------------------------|----------------------|--------|--------|---------|
67
+ | Qwen-7B-Chat | Genearl | 51.7 | 11.6 | - |
68
+ | DeepSeek-7B-Chat | General | 63.0 | 15.8 | 28.5 |
69
+ | InternLM2-Chat-7B | General | 70.7 | 23.0 | - |
70
+ | ChatGLM3-6B | General | 53.8 | 20.4 | 32 |
71
+ | MetaMath-Mistral-7B | Mathematics | 77.7 | 28.2 | 29 |
72
+ | MetaMath-Llemma-7B | Mathematics | 69.2 | 30.0 | - |
73
+ | **InternLM2-Math-7B** | Mathematics | **78.1** | **34.6** | **55** |
74
+ | InternLM2-Chat-20B | General | 79.6 | 31.9 | - |
75
+ | MetaMath-Llemma-34B | Mathematics | 75.8 | 34.8 | - |
76
+ | **InternLM2-Math-20B** | Mathematics | **82.6** | **37.7** | **66** |
77
+ | Qwen-72B | General | 78.9 | 35.2 | 52 |
78
+ | DeepSeek-67B | General | 84.1 | 32.6 | 58 |
79
+ | ChatGPT (GPT-3.5) | General | 80.8 | 34.1 | 41 |
80
+ | GPT4 (First version) | General | 92.0 | 42.5 | 68 |
81
+
82
+ # Inference
83
+
84
+ ## LMDeploy
85
+ We suggest using [LMDeploy](https://github.com/InternLM/LMDeploy)(>=0.2.1) for inference.
86
+ ```python
87
+ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
88
+
89
+ backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', tp=1, cache_max_entry_count=0.3)
90
+ chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
91
+ pipe = pipeline(model_path='internlm/internlm2-math-7b', chat_template_config=chat_template, backend_config=backend_config)
92
+
93
+ problem = '1+1='
94
+ result = pipe([problem], request_output_len=1024, top_k=1)
95
+ ```
96
+
97
+ ## Huggingface
98
+ ```python
99
+ import torch
100
+ from transformers import AutoTokenizer, AutoModelForCausalLM
101
+ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-math-7b", trust_remote_code=True)
102
+ # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
103
+ model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-math-7b", trust_remote_code=True, torch_dtype=torch.float16).cuda()
104
+ model = model.eval()
105
+ response, history = model.chat(tokenizer, "1+1=", history=[], meta_instruction="")
106
+ print(response)
107
+ ```
108
+
109
+ # Special usages
110
+ We list some instructions used in our SFT. You can use them to help you. You can use the other ways to prompt the model, but the following are recommended. InternLM2-Math may combine the following abilities but it is not guaranteed.
111
+
112
+ | Description | Query |
113
+ | --- | --- |
114
+ | Solving question via chain-of-thought | {Question} |
115
+ | Solving question via Lean 3 | {Question}\nSolve this via Lean 3 |
116
+ | Outcome reward model | Given a question and an answer, check is it correct?\nQuestion:{Question}\nAnswer:{COT} |
117
+ | Process reward model | Given a question and an answer, check correctness of each step.\nQuestion:{Question}\nAnswer:{COT} |
118
+ | Reward model | Given a question and two answers, which one is better? \nQuestion:{Question}\nAnswer 1:{COT}\nAnswer 2:{COT} |
119
+ | Convert chain-of-thought to Lean 3 | Convert this answer into Lean3. Question:{Question}\nAnswer:{COT} |
120
+ | Convert Lean 3 to chain-of-thought | Convert this lean 3 code into a natural language problem with answers:\n{LEAN} |
121
+ | Translate question and chain-of-thought answer to a proof statement | Convert this question and answer into a proof format.\nQuestion:{Question}\nAnswer:{COT} |
122
+ | Translate proof problem to Lean 3 | Convert this natural langauge statement into a Lean 3 theorem statement:{Theorem} |
123
+ | Translate Lean 3 to proof problem | Convert this Lean 3 theorem statement into natural language:{STATEMENT} |
124
+ | Suggest a tactic based on Lean state | Given the Lean 3 tactic state, suggest a next tactic:\n{State} |
125
+ | Rephrase Problem | Describe this problem in another way. {STATEMENT} |
126
+ | Augment Problem | Please augment a new problem based on: {Question} |
127
+ | Augment a harder Problem | Increase the complexity of the problem: {Question} |
128
+ | Change specific numbers | Change specific numbers: {Question}|
129
+ | Introduce fractions or percentages | Introduce fractions or percentages: {Question}|
130
+ | Code Intepreter | [lagent](https://github.com/InternLM/InternLM/blob/main/agent/lagent.md) |
131
+ | In-context Learning | Question:{Question}\nAnswer:{COT}\n...Question:{Question}\nAnswer:{COT}|
132
+
133
+ # Fine-tune and others
134
+ Please refer to [InternLM](https://github.com/InternLM/InternLM/tree/main).
135
+
136
+ # Known issues
137
+ Our model is still under development and will be upgraded. There are some possible issues of InternLM-Math.
138
+ - Jump the calculating step.
139
+ - Perform badly at Chinese fill-in-the-bank problems and English choice problems due to SFT data composition.
140
+ - The reward model mode can be better leveraged with assigned token probabilities.
141
+ - Code switch due to SFT data composition.
142
+ - Some abilities of Lean can only be adapted to GSM8K-like problems (e.g. Convert chain-of-thought to Lean 3), and performance related to Lean is not guaranteed.
143
+
144
+ # Citation and Tech Report
145
+ To be appended.
internlm2-math-20b-llama-Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d434c711281fd9ee8233aa56a082d8636c37bb4990aeacbd6997a5702435103a
3
+ size 10551178400
internlm2-math-20b-llama-Q3_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df869dfdfd88c75a5fa50f18e6ab07eaa69efacc8f82239e1b717ed8a3a4971
3
+ size 9722279072
internlm2-math-20b-llama-Q3_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b103ee195ea9e59648fff945269ff66793f5b5c241f4477adb921e4e433853e
3
+ size 8760472736
internlm2-math-20b-llama-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30758a2a0f5df087c847f4bff2675c832a5c1290c7f91b5c242b7ed2a9dd11d6
3
+ size 11984469152
internlm2-math-20b-llama-Q4_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f0a976096c2ac3934745ce483c42078bfb99ce276cf289b5d12aa6f1b573b4e
3
+ size 11401329824
internlm2-math-20b-llama-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7128b0065a4aa59a787ea9c007ae6d47b293ba07e2eeab17c867e1f18caa8eab
3
+ size 14075100320
internlm2-math-20b-llama-Q5_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4a733a2eca4ebd8c08c609c878a8fa707e566dee82936862a839c56b001181
3
+ size 13734182048
internlm2-math-20b-llama-Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9280baf11b2699a7b4798a841bd0461329a65b5d7ab1ff20dab03b8b640bee4
3
+ size 16296395936
internlm2-math-20b-llama-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a9d2f210bad445d716afef15d14958019f461ebd60517bef27cdcab670507e1
3
+ size 21106373792
original_repo_url.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/internlm/internlm2-math-20b