Spaces:

ccm
/

chat-with-SFF

Sleeping

ccm commited on Nov 8, 2024

Commit

b9f86a4

verified ·

1 Parent(s): e1832c6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,8 +67,11 @@ publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
 )
 # Create the callable LLM
-llm = transformers.pipeline(
-    task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4", device="cuda"
 )
@@ -115,14 +118,15 @@ def reply(message: str, history: list[str]) -> str:
         str: The generated response from the language model.
     """
-    return llm(
-        preprocess(message),
-        max_new_tokens=512,
-        return_full_text=False,
-    )[
-        0
-    ]["generated_text"]
 # Example Queries for Interface
 EXAMPLE_QUERIES = [

 )
 # Create the callable LLM
+# llm = transformers.pipeline(
+#     task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4", device="cuda"
+# )
+llm = llama_cpp.Llama.from_pretrained(
+    repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF", filename="Qwen2.5-7B-Instruct-Q4_K_M.gguf",
 )
         str: The generated response from the language model.
     """
+    # return llm(
+    #     preprocess(message),
+    #     max_new_tokens=512,
+    #     return_full_text=False,
+    # )[
+    #     0
+    # ]["generated_text"]
+    return llm(preprocess(message))["choices"][0]["text"]
 # Example Queries for Interface
 EXAMPLE_QUERIES = [