Gopal2002 commited on
Commit
580a57c
·
verified ·
1 Parent(s): e9fb3a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -2
app.py CHANGED
@@ -1,4 +1,66 @@
1
  import streamlit as st
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from langchain.chains import LLMChain
3
+ from langchain.prompts import PromptTemplate
4
+ import torch,os
5
+ from langchain.llms import HuggingFacePipeline
6
+ from transformers import AutoTokenizer,AutoModelForCausalLM,pipeline,BitsAndBytesConfig
7
 
8
+ model_name_or_path = "meta-llama/Llama-2-13b-chat-hf"
9
+
10
+ # Count the number of GPUs available
11
+ gpu_count = torch.cuda.device_count()
12
+
13
+ # Determine the device to use based on GPU availability and count
14
+ # If more than one GPU is available, use 'auto' to allow the library to choose
15
+ # If only one GPU is available, use 'cuda:0' to specify the first GPU
16
+ # If no GPU is available, use the CPU
17
+ if torch.cuda.is_available() and gpu_count > 1:
18
+ device = 'auto'
19
+ elif torch.cuda.is_available():
20
+ device = 'cuda:0'
21
+ else:
22
+ device = 'cpu'
23
+
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
25
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
26
+ # quantization_config=bnb_config,
27
+ torch_dtype=torch.float16,
28
+ device_map='auto',)
29
+ print(model.hf_device_map)
30
+
31
+ pipe = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer,
35
+ max_length=2500,
36
+ return_full_text=True,
37
+ do_sample=True,
38
+ repetition_penalty=1.15,
39
+ num_return_sequences=1,
40
+ pad_token_id=2,
41
+ model_kwargs={"temperature": 0.3,
42
+ "top_p":0.95,
43
+ "top_k":40,
44
+ "max_new_tokens":2500},
45
+ )
46
+ llm = HuggingFacePipeline(pipeline=pipe)
47
+ template = template = """Prompt: {query}
48
+ Answer: """
49
+
50
+ prompt_template = PromptTemplate(
51
+ input_variables=["query"],
52
+ template=template
53
+ )
54
+ #instantiate the chain
55
+ llm_chain = LLMChain(prompt=prompt_template, llm=llm)
56
+
57
+ st.title('Test Multi GPU')
58
+
59
+ md = st.text_area('Type in your markdown string (without outer quotes)')
60
+
61
+ st.button("Enter", type="primary")
62
+ if st.button("Say hello"):
63
+ resp=llm_chain.invoke(md)['text']
64
+ st.write(resp)
65
+ else:
66
+ st.write("Goodbye")