Spaces:

ccm
/

chat-with-SFF

Sleeping

ccm commited on Nov 8, 2024

Commit

326fe51

verified ·

1 Parent(s): d5f3fea

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ import gradio  # Interface handling
 import spaces  # For GPU
 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
-import llama_cpp
 # The number of publications to retrieve for the prompt
 PUBLICATIONS_TO_RETRIEVE = 5
@@ -67,15 +67,8 @@ publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
 )
 # Create the callable LLM
-# llm = transformers.pipeline(
-#     task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4", device="cuda"
-# )
-llm = llama_cpp.Llama.from_pretrained(
-    repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
-    filename="Qwen2.5-7B-Instruct-Q4_K_M.gguf",
-    n_gpu_layers=-1,
-    n_ctx=2048,
-    verbose=True
 )
@@ -122,15 +115,13 @@ def reply(message: str, history: list[str]) -> str:
         str: The generated response from the language model.
     """
-    # return llm(
-    #     preprocess(message),
-    #     max_new_tokens=512,
-    #     return_full_text=False,
-    # )[
-    #     0
-    # ]["generated_text"]
-    return llm(preprocess(message))["choices"][0]["text"]
 # Example Queries for Interface
 EXAMPLE_QUERIES = [

 import spaces  # For GPU
 import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
+import transformers
 # The number of publications to retrieve for the prompt
 PUBLICATIONS_TO_RETRIEVE = 5
 )
 # Create the callable LLM
+llm = transformers.pipeline(
+    task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4", device="cuda"
 )
         str: The generated response from the language model.
     """
+    return llm(
+        preprocess(message),
+        max_new_tokens=512,
+        return_full_text=False,
+    )[
+        0
+    ]["generated_text"]
 # Example Queries for Interface
 EXAMPLE_QUERIES = [