ccm commited on
Commit
f35b338
·
verified ·
1 Parent(s): 0d63fea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -4
app.py CHANGED
@@ -38,9 +38,6 @@ import langchain_community.vectorstores # Vectorstore for publications
38
  import langchain_huggingface # Embeddings
39
  import transformers
40
 
41
- import subprocess
42
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
43
-
44
  # The number of publications to retrieve for the prompt
45
  PUBLICATIONS_TO_RETRIEVE = 5
46
 
@@ -71,7 +68,7 @@ publication_vectorstore = langchain_community.vectorstores.FAISS.load_local(
71
 
72
  # Create the callable LLM
73
  llm = transformers.pipeline(
74
- task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-AWQ", device="cuda", attn_implementation="flash_attention_2"
75
  )
76
 
77
 
 
38
  import langchain_huggingface # Embeddings
39
  import transformers
40
 
 
 
 
41
  # The number of publications to retrieve for the prompt
42
  PUBLICATIONS_TO_RETRIEVE = 5
43
 
 
68
 
69
  # Create the callable LLM
70
  llm = transformers.pipeline(
71
+ task="text-generation", model="Qwen/Qwen2.5-7B-Instruct-AWQ", device="cuda"
72
  )
73
 
74