mjbuehler commited on
Commit
14ff717
·
verified ·
1 Parent(s): c4dc831

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +118 -0
README.md CHANGED
@@ -1,3 +1,121 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ ### BioinspiredMixtral: Large Language Model for the Mechanics of Biological and Bio-Inspired Materials using Mixture-of-Experts
5
+
6
+ To accelerate discovery and guide insights, we report an open-source autoregressive transformer large language model (LLM), trained on expert knowledge in the biological materials field, especially focused on mechanics and structural properties.
7
+
8
+ The model is finetuned with a corpus of over a thousand peer-reviewed articles in the field of structural biological and bio-inspired materials and can be prompted to recall information, assist with research tasks, and function as an engine for creativity.
9
+
10
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/623ce1c6b66fedf374859fe7/K0GifLVENb8G0nERQAzeQ.png)
11
+
12
+ This model is based on work reported in https://doi.org/10.1002/advs.202306724, but focused on the development of a mixture-of-experts strategy.
13
+
14
+ The model is a fine-tuned version of mistralai/Mixtral-8x7B-Instruct-v0.1.
15
+
16
+ ```
17
+ from llama_cpp import Llama
18
+
19
+ model_path='lamm-mit/BioinspiredMixtral/ggml-model-q5_K_M.gguf'
20
+ chat_format="mistral-instruct"
21
+
22
+ llm = Llama(model_path=model_path,
23
+ n_gpu_layers=-1,verbose= True,
24
+ n_ctx=10000,
25
+ #main_gpu=0,
26
+ chat_format=chat_format,
27
+ #split_mode=llama_cpp.LLAMA_SPLIT_LAYER
28
+ )
29
+ ```
30
+
31
+ Or, download directly from Hugging Face:
32
+
33
+ ```
34
+ from llama_cpp import Llama
35
+
36
+ model_path='lamm-mit/BioinspiredMixtral/ggml-model-q5_K_M.gguf'
37
+ chat_format="mistral-instruct"
38
+
39
+ llm = Llama.from_pretrained(
40
+ repo_id=model_path,
41
+ filename="*q5_K_M.gguf",
42
+ verbose=True,
43
+ n_gpu_layers=-1,
44
+ n_ctx=10000,
45
+ #main_gpu=0,
46
+ chat_format=chat_format,
47
+ )
48
+ ```
49
+ For inference:
50
+ ```
51
+ def generate_response (model,tokenizer,text_input="Biology offers amazing possibilities, especially for",
52
+ num_return_sequences=1,
53
+ temperature=1., #the higher the temperature, the more creative the model becomes
54
+ max_new_tokens=127,
55
+ num_beams=1,
56
+ top_k = 50,
57
+ top_p =0.9,repetition_penalty=1.,eos_token_id=2,verbatim=False,
58
+ exponential_decay_length_penalty_fac=None,add_special_tokens =True,
59
+ ):
60
+ inputs = tokenizer(text_input, add_special_tokens = add_special_tokens, return_tensors ='pt').to(device)
61
+
62
+ with torch.no_grad():
63
+
64
+ outputs = model.generate (input_ids = inputs["input_ids"],
65
+ attention_mask = inputs["attention_mask"] , # This is usually done automatically by the tokenizer
66
+ max_new_tokens=max_new_tokens,
67
+ temperature=temperature, #value used to modulate the next token probabilities.
68
+ num_beams=num_beams,
69
+ top_k = top_k,
70
+ top_p = top_p,
71
+ num_return_sequences = num_return_sequences,
72
+ eos_token_id=eos_token_id,
73
+ pad_token_id = eos_token_id,
74
+ do_sample =True,#skip_prompt=True,
75
+ repetition_penalty=repetition_penalty,
76
+ )
77
+
78
+ return tokenizer.batch_decode(outputs[:,inputs["input_ids"].shape[1]:].detach().cpu().numpy(), skip_special_tokens=True)
79
+
80
+ def generate_BioMixtral (system_prompt='You a helpful assistant. You are familiar with materials science, especially biological and bioinspired materials. ',
81
+ prompt='What is spider silk in the context of bioinspired materials?',
82
+ repetition_penalty=1.,
83
+ top_p=0.9, top_k=256,
84
+ temperature=0.5, max_tokens=512, verbatim=False, eos_token=None,
85
+ prepend_response='',
86
+ ):
87
+
88
+ if eos_token==None:
89
+ eos_token= tokenizer.eos_token_id
90
+
91
+ if system_prompt==None:
92
+ messages=[
93
+ {"role": "user", "content": prompt},
94
+ ]
95
+ else:
96
+ messages=[
97
+ {"role": "system", "content": system_prompt},
98
+ {"role": "user", "content": prompt},
99
+ ]
100
+ txt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True,
101
+ )
102
+ txt=txt+prepend_response
103
+
104
+ output_text=generate_response (model,tokenizer,text_input=txt,eos_token_id=eos_token,
105
+ num_return_sequences=1, repetition_penalty=repetition_penalty,
106
+ top_p=top_p, top_k=top_k,
107
+ temperature=temperature,max_new_tokens=max_tokens, verbatim=verbatim,
108
+ )
109
+ return output_text[0]
110
+
111
+ start_time = time.time()
112
+ result=generate_BioMixtral(system_prompt='You respond accurately.',
113
+ prompt="What is graphene? Answer with detail.",
114
+ max_tokens=512, temperature=0.7, )
115
+
116
+ print (result)
117
+ deltat=time.time() - start_time
118
+ print("--- %s seconds ---" % deltat)
119
+ toked=tokenizer(res)
120
+ print ("Tokens per second (generation): ", len (toked['input_ids'])/deltat)
121
+ ```