import streamlit as st import torch # <-- Import PyTorch from transformers import AutoTokenizer, AutoModelForCausalLM # Title of the app st.title("IntelliCor Chat with Falcon Model") # Sidebar for user instructions st.sidebar.title("About") st.sidebar.info("This app uses the Falcon model to generate text completions based on your input.") # Load the Falcon model and tokenizer @st.cache_resource def load_model(): model_name = "tiiuae/falcon-7b-instruct" # Change to Falcon-3B if needed tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", # Automatically map to GPU/CPU torch_dtype="auto" # Use FP16 if available ) return tokenizer, model tokenizer, model = load_model() # User input prompt = st.text_area("Enter your prompt:", placeholder="Type something here...") # Inference settings max_tokens = st.slider("Max tokens for completion:", 50, 500, 100) temperature = st.slider("Temperature (creativity):", 0.0, 1.5, 0.7) # Generate response when the user clicks the button if st.button("Generate"): if prompt.strip() == "": st.warning("Please enter a valid prompt.") else: # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") # Generate text outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, do_sample=True ) # Decode and display the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.subheader("Generated Text:") st.write(response)