SagarKeshave commited on
Commit
c5d8529
·
verified ·
1 Parent(s): a71a189
Files changed (1) hide show
  1. app.py +49 -5
app.py CHANGED
@@ -1,9 +1,53 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
 
4
- pipe = pipeline("sentiment-analysis")
5
- text = st.text_area("Enter text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  if text:
8
- out = pipe(text)
9
- st.json(out)
 
1
  import streamlit as st
 
2
 
3
+
4
+ import transformers
5
+ import torch
6
+ import json
7
+ import os
8
+ from transformers import AutoTokenizer, TextStreamer , pipeline
9
+
10
+
11
+ model_id = "WizardLM/WizardMath-7B-V1.1"
12
+
13
+
14
+ # Configuration
15
+ runtimeFlag = "cuda:0" #Run on GPU (you can't run GPTQ on cpu)
16
+ cache_dir = None # by default, don't set a cache directory. This is automatically updated if you connect Google Drive.
17
+ scaling_factor = 1.0 # allows for a max sequence length of 16384*6 = 98304! Unfortunately, requires Colab Pro and a V100 or A100 to have sufficient RAM.
18
+
19
+
20
+
21
+ from transformers import AutoTokenizer, AutoModelForCausalLM
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
24
+
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_id,
27
+ device_map="auto",
28
+ offload_folder="offload",
29
+ pad_token_id=tokenizer.eos_token_id,
30
+ offload_state_dict = True,
31
+ torch_dtype=torch.float16,
32
+
33
+
34
+ # rope_scaling = {"type": "dynamic", "factor": scaling_factor}
35
+ )
36
+
37
+
38
+
39
+ pipe = pipeline(
40
+ "text-generation",
41
+ model=model,
42
+ tokenizer=tokenizer,
43
+ max_new_tokens=512,
44
+ temperature=0.7,
45
+ top_p=0.95,
46
+ repetition_penalty=1.15
47
+ )
48
+
49
+ question = st.text_area("Enter questoin")
50
 
51
  if text:
52
+ out = pipe(question)[0]['generated_text']
53
+ st.write(out)