import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import pandas as pd import re # Load the Falcon-3B-Instruct model and tokenizer @st.cache_resource def load_falcon_model(): """Load the Falcon-3B-Instruct model and tokenizer.""" model_name = "tiiuae/falcon-3b-instruct" # Smaller model for faster loading tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", # Use FP16 if supported device_map="auto" # Automatically distributes across available devices ) return pipeline("text-generation", model=model, tokenizer=tokenizer) qa_pipeline = load_falcon_model() # Load SOP Dataset @st.cache_data def load_sop_dataset(): """Load SOP dataset from CSV.""" dataset = pd.read_csv("dataset.csv") # Ensure this file is uploaded to your Hugging Face Space return dataset dataset = load_sop_dataset() # Utility function to find the most relevant context def find_best_context(question, dataset): """Find the single best context for a given question.""" best_score = 0 best_context = None for index, row in dataset.iterrows(): # Simple heuristic: Count the number of overlapping words overlap = len(set(question.lower().split()) & set(row["text"].lower().split())) if overlap > best_score: best_score = overlap best_context = row["text"] return best_context # Streamlit UI st.title("SOP Question Answering AI with Falcon") st.markdown("Ask any question about Standard Operating Procedures:") # User input question = st.text_area("Enter your question:", "") # Generate answer if st.button("Get Answer"): if question: with st.spinner("Finding the best context..."): # Automatically find the most relevant context context = find_best_context(question, dataset) if context: with st.spinner("Answering your question..."): prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:" result = qa_pipeline(prompt, max_length=150, num_return_sequences=1) st.success("Answer:") st.write(result[0]["generated_text"]) else: st.warning("No relevant context found. Please try rephrasing your question.") else: st.warning("Please enter a question.")