michaelmc1618 commited on
Commit
c37e0f8
·
verified ·
1 Parent(s): 269af6a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # Title of the app
5
+ st.title("IntelliCor Chat with Falcon Model")
6
+
7
+ # Sidebar for user instructions
8
+ st.sidebar.title("About")
9
+ st.sidebar.info("This app uses the Falcon model to generate text completions based on your input.")
10
+
11
+ # Load the Falcon model and tokenizer
12
+ @st.cache_resource
13
+ def load_model():
14
+ model_name = "tiiuae/falcon-7b-instruct" # Change to Falcon-3B if needed
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ device_map="auto", # Automatically map to GPU/CPU
19
+ torch_dtype="auto" # Use FP16 if available
20
+ )
21
+ return tokenizer, model
22
+
23
+ tokenizer, model = load_model()
24
+
25
+ # User input
26
+ prompt = st.text_area("Enter your prompt:", placeholder="Type something here...")
27
+
28
+ # Inference settings
29
+ max_tokens = st.slider("Max tokens for completion:", 50, 500, 100)
30
+ temperature = st.slider("Temperature (creativity):", 0.0, 1.5, 0.7)
31
+
32
+ # Generate response when the user clicks the button
33
+ if st.button("Generate"):
34
+ if prompt.strip() == "":
35
+ st.warning("Please enter a valid prompt.")
36
+ else:
37
+ # Tokenize the input
38
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
39
+
40
+ # Generate text
41
+ outputs = model.generate(
42
+ **inputs,
43
+ max_new_tokens=max_tokens,
44
+ temperature=temperature,
45
+ do_sample=True
46
+ )
47
+
48
+ # Decode and display the response
49
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
+ st.subheader("Generated Text:")
51
+ st.write(response)