Spaces:

ccm
/

chat-with-SFF

Sleeping

ccm commited on Nov 8, 2024

Commit

f35b338

verified ·

1 Parent(s): 0d63fea

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,9 +38,6 @@ import langchain_community.vectorstores  # Vectorstore for publications
 import langchain_huggingface  # Embeddings
 import transformers
-import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # The number of publications to retrieve for the prompt
 PUBLICATIONS_TO_RETRIEVE = 5
@@ -71,7 +68,7 @@ publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
 # Create the callable LLM
 llm = transformers.pipeline(
-    task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-AWQ", device="cuda", attn_implementation="flash_attention_2"
 )

 import langchain_huggingface  # Embeddings
 import transformers
 # The number of publications to retrieve for the prompt
 PUBLICATIONS_TO_RETRIEVE = 5
 # Create the callable LLM
 llm = transformers.pipeline(
+    task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-AWQ", device="cuda"
 )