Spaces:

bardicreels
/

rag2

Sleeping

App Files Files Community

user commited on Sep 12, 2024

Commit

4e8606b

1 Parent(s): b3b4e83

bug fix

Browse files

Files changed (1) hide show

app.py +90 -182

app.py CHANGED Viewed

@@ -42,194 +42,102 @@ MODEL_COMBINATIONS = {
     }
 }
-@st.cache_resource
-def load_models(model_combination):
     try:
-        embedding_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
-        embedding_model = AutoModel.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
-        generation_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
-        generation_model = AutoModelForCausalLM.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
-        return embedding_tokenizer, embedding_model, generation_tokenizer, generation_model
     except Exception as e:
-        st.error(f"Error loading models: {str(e)}")
-        return None, None, None, None
-@st.cache_data
-def load_and_process_text(file_path):
     try:
-        with open(file_path, 'r', encoding='utf-8') as file:
-            text = file.read()
-        chunks = [text[i:i+512] for i in range(0, len(text), 512)]
-        return chunks
     except Exception as e:
-        st.error(f"Error loading text file: {str(e)}")
-        return []
-@st.cache_data
-def create_embeddings(chunks, _embedding_model):
-    if isinstance(_embedding_model, str):
-        tokenizer = AutoTokenizer.from_pretrained(_embedding_model)
-        model = AutoModel.from_pretrained(_embedding_model)
-    else:
-        # Assume _embedding_model is already a model instance
-        model = _embedding_model
-        tokenizer = AutoTokenizer.from_pretrained(model.config._name_or_path)
-    embeddings = []
-    for chunk in chunks:
-        inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
-        with torch.no_grad():
-            outputs = model(**inputs)
-        embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
-    return np.vstack(embeddings)
 @st.cache_resource
-def create_faiss_index(embeddings):
-    index = faiss.IndexFlatL2(embeddings.shape[1])
-    index.add(embeddings)
-    return index
-def generate_response(query, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, index, chunks):
-    inputs = embedding_tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
-    with torch.no_grad():
-        outputs = embedding_model(**inputs)
-    query_embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
-    k = 3
-    _, I = index.search(query_embedding.reshape(1, -1), k)
-    context = " ".join([chunks[i] for i in I[0]])
-    prompt = f"As the Muse of A.R. Ammons, respond to this query: {query}\nContext: {context}\nMuse:"
-    input_ids = generation_tokenizer.encode(prompt, return_tensors="pt")
-    output = generation_model.generate(
-        input_ids,
-        max_new_tokens=100,
-        num_return_sequences=1,
-        temperature=0.7,
-        do_sample=True,
-        top_k=50,
-        top_p=0.95,
-        no_repeat_ngram_size=2
-    )
-    response = generation_tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-    muse_response = response.split("Muse:")[-1].strip()
-    # Check if the response contains unused tokens
-    if "[unused" in muse_response:
-        muse_response = "I apologize, but I'm having trouble formulating a response. Let me try again with a simpler message: Hello! As the Muse of A.R. Ammons, I'm here to inspire and discuss poetry. How may I assist you today?"
-    return muse_response
-def save_data(chunks, embeddings, index):
-    with open('chunks.pkl', 'wb') as f:
-        pickle.dump(chunks, f)
-    np.save('embeddings.npy', embeddings)
-    faiss.write_index(index, 'faiss_index.bin')
-def load_data():
-    if os.path.exists('chunks.pkl') and os.path.exists('embeddings.npy') and os.path.exists('faiss_index.bin'):
         with open('chunks.pkl', 'rb') as f:
             chunks = pickle.load(f)
-        embeddings = np.load('embeddings.npy')
-        index = faiss.read_index('faiss_index.bin')
-        return chunks, embeddings, index
-    return None, None, None
-# Streamlit UI
-st.set_page_config(page_title="A.R. Ammons' Muse Chatbot", page_icon="🎭")
-st.title("A.R. Ammons' Muse Chatbot 🎭")
-st.markdown("""
-    <style>
-    .big-font {
-        font-size:20px !important;
-        font-weight: bold;
-    }
-    </style>
-    """, unsafe_allow_html=True)
-st.markdown('<p class="big-font">Chat with the Muse of A.R. Ammons. Ask questions or discuss poetry!</p>', unsafe_allow_html=True)
-# Model selection
-if 'model_combination' not in st.session_state:
-    st.session_state.model_combination = "Fastest (30 seconds)"
-# Create a list of model options, with non-free models at the end
-free_models = [k for k, v in MODEL_COMBINATIONS.items() if v['free']]
-non_free_models = [k for k, v in MODEL_COMBINATIONS.items() if not v['free']]
-all_models = free_models + non_free_models
-# Custom CSS to grey out non-free options
-st.markdown("""
-<style>
-    .stSelectbox div[role="option"][aria-selected="false"]:nth-last-child(-n+2) {
-        color: grey !important;
-    }
-</style>
-""", unsafe_allow_html=True)
-selected_model = st.selectbox(
-    "Choose a model combination:",
-    all_models,
-    index=all_models.index(st.session_state.model_combination),
-    format_func=lambda x: f"{x} {'(Not Free)' if not MODEL_COMBINATIONS[x]['free'] else ''}"
-)
-# Prevent selection of non-free models
-if not MODEL_COMBINATIONS[selected_model]['free']:
-    st.warning("Premium models are not available in the free version.")
-    st.stop()
-st.session_state.model_combination = selected_model
-st.info(f"Potential time saved compared to slowest option: {MODEL_COMBINATIONS[selected_model]['time_saved']}")
-if st.button("Load Selected Models"):
-    with st.spinner("Loading models and data..."):
-        embedding_tokenizer, embedding_model, generation_tokenizer, generation_model = load_models(st.session_state.model_combination)
-        chunks = load_and_process_text('ammons_muse.txt')
-        embeddings = create_embeddings(chunks, embedding_model)
-        index = create_faiss_index(embeddings)
-    st.session_state.models_loaded = True
-    st.success("Models loaded successfully!")
-if 'models_loaded' not in st.session_state or not st.session_state.models_loaded:
-    st.warning("Please load the models before chatting.")
-    st.stop()
-# Initialize chat history
-if 'messages' not in st.session_state:
-    st.session_state.messages = []
-# Display chat messages from history on app rerun
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# React to user input
-if prompt := st.chat_input("What would you like to ask the Muse?"):
-    st.chat_message("user").markdown(prompt)
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.spinner("The Muse is contemplating..."):
-        try:
-            response = generate_response(prompt, tokenizer, generation_model, embedding_model, index, chunks)
-        except Exception as e:
-            response = f"I apologize, but I encountered an error: {str(e)}"
-    with st.chat_message("assistant"):
-        st.markdown(response)
-    st.session_state.messages.append({"role": "assistant", "content": response})
-# Add a button to clear chat history
-if st.button("Clear Chat History"):
-    st.session_state.messages = []
-    st.experimental_rerun()
-# Add a footer
-st.markdown("---")
-st.markdown("*Powered by the spirit of A.R. Ammons and the magic of AI*")

     }
 }
+def load_model(model_name):
     try:
+        return AutoModel.from_pretrained(model_name)
     except Exception as e:
+        st.error(f"Error loading model {model_name}: {str(e)}")
+        return None
+def load_tokenizer(model_name):
     try:
+        return AutoTokenizer.from_pretrained(model_name)
     except Exception as e:
+        st.error(f"Error loading tokenizer for {model_name}: {str(e)}")
+        return None
+@st.cache_resource
+def load_embedding_model(model_name):
+    return load_model(model_name)
 @st.cache_resource
+def load_generation_model(model_name):
+    try:
+        return AutoModelForCausalLM.from_pretrained(model_name)
+    except Exception as e:
+        st.error(f"Error loading generation model {model_name}: {str(e)}")
+        return None
+def load_index_and_chunks():
+    try:
+        with open('faiss_index.pkl', 'rb') as f:
+            index = pickle.load(f)
         with open('chunks.pkl', 'rb') as f:
             chunks = pickle.load(f)
+        return index, chunks
+    except Exception as e:
+        st.error(f"Error loading index and chunks: {str(e)}")
+        return None, None
+def generate_response(prompt, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, index, chunks):
+    try:
+        # Embed the prompt
+        prompt_embedding = embedding_model(embedding_tokenizer(prompt, return_tensors='pt')['input_ids']).last_hidden_state.mean(dim=1).detach().numpy()
+        # Search for similar chunks
+        D, I = index.search(prompt_embedding, k=5)
+        context = " ".join([chunks[i] for i in I[0]])
+        # Generate response
+        input_text = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:"
+        input_ids = generation_tokenizer(input_text, return_tensors="pt").input_ids
+        output = generation_model.generate(input_ids, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)
+        response = generation_tokenizer.decode(output[0], skip_special_tokens=True)
+        return response
+    except Exception as e:
+        st.error(f"Error generating response: {str(e)}")
+        return "I apologize, but I encountered an error while generating a response."
+def main():
+    st.title("Your Muse Chat App")
+    # Load models and data
+    selected_combo = st.selectbox("Choose a model combination:", list(MODEL_COMBINATIONS.keys()))
+    combo = MODEL_COMBINATIONS[selected_combo]
+    embedding_model = load_embedding_model(combo['embedding'])
+    generation_model = load_generation_model(combo['generation'])
+    embedding_tokenizer = load_tokenizer(combo['embedding'])
+    generation_tokenizer = load_tokenizer(combo['generation'])
+    index, chunks = load_index_and_chunks()
+    if not all([embedding_model, generation_model, embedding_tokenizer, generation_tokenizer, index, chunks]):
+        st.error("Some components failed to load. Please check the errors above.")
+        return
+    # Initialize chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Chat input
+    if prompt := st.chat_input("What would you like to ask the Muse?"):
+        st.chat_message("user").markdown(prompt)
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.spinner("The Muse is contemplating..."):
+            response = generate_response(prompt, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, index, chunks)
+        with st.chat_message("assistant"):
+            st.markdown(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})
+if __name__ == "__main__":
+    main()