Spaces:

sakuexe
/

thesizer

Sleeping

sakuexe commited on Oct 23, 2024

Commit

6427fd5

1 Parent(s): d01acd5

added debug prints

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ MODEL_NAME = "google/gemma-2-2b-it"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     # quantization_config=bnb_config,
-    device_map="auto",
     torch_dtype=torch.bfloat16
 )
@@ -100,14 +100,18 @@ def generate_prompt(message_history: list[ChatMessage], max_history=5):
 async def generate_answer(message_history: list[ChatMessage]):
     # generate a vector store
     db = await get_document_database("learning_material/*/*/*")
     # initialize the similarity search
     n_of_best_results = 4
     retriever = db.as_retriever(
         search_type="similarity", search_kwargs={"k": n_of_best_results})
     prompt = generate_prompt(message_history, max_history=5)
     # create the pipeline for generating a response
     # RunnablePassthrough handles the invoke parameters
@@ -120,11 +124,13 @@ async def generate_answer(message_history: list[ChatMessage]):
     # fetch the context using the latest message as the fetch string
     user_input = message_history[-1]["content"]
     response = retrieval_chain.invoke(user_input)
-    # # debugging
-    # print("=====raw response=====")
-    # print(response)
     # get the next response from the AI
     # first parse until the last user input and then get the first response

 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     # quantization_config=bnb_config,
+    # device_map="cpu",
     torch_dtype=torch.bfloat16
 )
 async def generate_answer(message_history: list[ChatMessage]):
     # generate a vector store
+    print("creating the document database")
     db = await get_document_database("learning_material/*/*/*")
+    print("Document database is ready")
     # initialize the similarity search
     n_of_best_results = 4
     retriever = db.as_retriever(
         search_type="similarity", search_kwargs={"k": n_of_best_results})
+    print("generating prompt")
     prompt = generate_prompt(message_history, max_history=5)
+    print("prompt is ready")
     # create the pipeline for generating a response
     # RunnablePassthrough handles the invoke parameters
     # fetch the context using the latest message as the fetch string
     user_input = message_history[-1]["content"]
+    print("invoking")
     response = retrieval_chain.invoke(user_input)
+    print("response recieved from invoke")
+    # debugging
+    print("=====raw response=====")
+    print(response)
     # get the next response from the AI
     # first parse until the last user input and then get the first response