Spaces:

bardicreels
/

rag2

Sleeping

App Files Files Community

user commited on Sep 12, 2024

Commit

97426bb

1 Parent(s): becd78e

Fix response generation and handle unused token errors

Browse files

Files changed (2) hide show

app.py +35 -38
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -43,16 +43,12 @@ MODEL_COMBINATIONS = {
 }
 @st.cache_resource
-def load_models(combination):
     try:
-        embedding_model_name = MODEL_COMBINATIONS[combination]["embedding"]
-        generation_model_name = MODEL_COMBINATIONS[combination]["generation"]
-        embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
-        embedding_model = AutoModel.from_pretrained(embedding_model_name)
-        generation_tokenizer = AutoTokenizer.from_pretrained(generation_model_name)
-        generation_model = AutoModelForCausalLM.from_pretrained(generation_model_name)
         return embedding_tokenizer, embedding_model, generation_tokenizer, generation_model
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
@@ -99,27 +95,38 @@ def generate_response(query, embedding_tokenizer, generation_tokenizer, generati
     prompt = f"As the Muse of A.R. Ammons, respond to this query: {query}\nContext: {context}\nMuse:"
     input_ids = generation_tokenizer.encode(prompt, return_tensors="pt")
-    output = generation_model.generate(input_ids, max_new_tokens=100, num_return_sequences=1, temperature=0.7)
     response = generation_tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     muse_response = response.split("Muse:")[-1].strip()
     return muse_response
-def save_data(chunks, embeddings, index, model_combination):
-    os.makedirs('data', exist_ok=True)
-    with open(f'data/chunks_{model_combination}.pkl', 'wb') as f:
         pickle.dump(chunks, f)
-    np.save(f'data/embeddings_{model_combination}.npy', embeddings)
-    faiss.write_index(index, f'data/faiss_index_{model_combination}.bin')
-def load_data(model_combination):
-    if os.path.exists(f'data/chunks_{model_combination}.pkl') and \
-       os.path.exists(f'data/embeddings_{model_combination}.npy') and \
-       os.path.exists(f'data/faiss_index_{model_combination}.bin'):
-        with open(f'data/chunks_{model_combination}.pkl', 'rb') as f:
             chunks = pickle.load(f)
-        embeddings = np.load(f'data/embeddings_{model_combination}.npy')
-        index = faiss.read_index(f'data/faiss_index_{model_combination}.bin')
         return chunks, embeddings, index
     return None, None, None
@@ -174,22 +181,12 @@ st.info(f"Potential time saved compared to slowest option: {MODEL_COMBINATIONS[s
 if st.button("Load Selected Models"):
     with st.spinner("Loading models and data..."):
         embedding_tokenizer, embedding_model, generation_tokenizer, generation_model = load_models(st.session_state.model_combination)
-        # Try to load existing data
-        chunks, embeddings, index = load_data(st.session_state.model_combination)
-        # If data doesn't exist, process it and save
-        if chunks is None or embeddings is None or index is None:
-            chunks = load_and_process_text('ammons_muse.txt')
-            embeddings = create_embeddings(chunks, embedding_model)
-            index = create_faiss_index(embeddings)
-            save_data(chunks, embeddings, index, st.session_state.model_combination)
     st.session_state.models_loaded = True
-    st.session_state.chunks = chunks
-    st.session_state.embeddings = embeddings
-    st.session_state.index = index
-    st.success("Models and data loaded successfully!")
 if 'models_loaded' not in st.session_state or not st.session_state.models_loaded:
     st.warning("Please load the models before chatting.")
@@ -211,7 +208,7 @@ if prompt := st.chat_input("What would you like to ask the Muse?"):
     with st.spinner("The Muse is contemplating..."):
         try:
-            response = generate_response(prompt, embedding_tokenizer, generation_tokenizer, generation_model, embedding_model, st.session_state.index, st.session_state.chunks)
         except Exception as e:
             response = f"I apologize, but I encountered an error: {str(e)}"

 }
 @st.cache_resource
+def load_models(model_combination):
     try:
+        embedding_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
+        embedding_model = AutoModel.from_pretrained(MODEL_COMBINATIONS[model_combination]['embedding'])
+        generation_tokenizer = AutoTokenizer.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
+        generation_model = AutoModelForCausalLM.from_pretrained(MODEL_COMBINATIONS[model_combination]['generation'])
         return embedding_tokenizer, embedding_model, generation_tokenizer, generation_model
     except Exception as e:
         st.error(f"Error loading models: {str(e)}")
     prompt = f"As the Muse of A.R. Ammons, respond to this query: {query}\nContext: {context}\nMuse:"
     input_ids = generation_tokenizer.encode(prompt, return_tensors="pt")
+    output = generation_model.generate(
+        input_ids,
+        max_new_tokens=100,
+        num_return_sequences=1,
+        temperature=0.7,
+        do_sample=True,
+        top_k=50,
+        top_p=0.95,
+        no_repeat_ngram_size=2
+    )
     response = generation_tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     muse_response = response.split("Muse:")[-1].strip()
+    # Check if the response contains unused tokens
+    if "[unused" in muse_response:
+        muse_response = "I apologize, but I'm having trouble formulating a response. Let me try again with a simpler message: Hello! As the Muse of A.R. Ammons, I'm here to inspire and discuss poetry. How may I assist you today?"
     return muse_response
+def save_data(chunks, embeddings, index):
+    with open('chunks.pkl', 'wb') as f:
         pickle.dump(chunks, f)
+    np.save('embeddings.npy', embeddings)
+    faiss.write_index(index, 'faiss_index.bin')
+def load_data():
+    if os.path.exists('chunks.pkl') and os.path.exists('embeddings.npy') and os.path.exists('faiss_index.bin'):
+        with open('chunks.pkl', 'rb') as f:
             chunks = pickle.load(f)
+        embeddings = np.load('embeddings.npy')
+        index = faiss.read_index('faiss_index.bin')
         return chunks, embeddings, index
     return None, None, None
 if st.button("Load Selected Models"):
     with st.spinner("Loading models and data..."):
         embedding_tokenizer, embedding_model, generation_tokenizer, generation_model = load_models(st.session_state.model_combination)
+        chunks = load_and_process_text('ammons_muse.txt')
+        embeddings = create_embeddings(chunks, embedding_model)
+        index = create_faiss_index(embeddings)
     st.session_state.models_loaded = True
+    st.success("Models loaded successfully!")
 if 'models_loaded' not in st.session_state or not st.session_state.models_loaded:
     st.warning("Please load the models before chatting.")
     with st.spinner("The Muse is contemplating..."):
         try:
+            response = generate_response(prompt, tokenizer, generation_model, embedding_model, index, chunks)
         except Exception as e:
             response = f"I apologize, but I encountered an error: {str(e)}"

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
 torch
 transformers
 sentence-transformers

 torch
 transformers
 sentence-transformers