Spaces:
Sleeping
Sleeping
Nicolai Berk
commited on
Commit
·
9629f65
1
Parent(s):
a92e9d3
Adjust GPU decorator
Browse files
app.py
CHANGED
@@ -8,16 +8,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
8 |
import os
|
9 |
import spaces
|
10 |
|
11 |
-
print("CUDA available:", torch.cuda.is_available())
|
12 |
-
|
13 |
-
@spaces.GPU
|
14 |
-
def claim_gpu():
|
15 |
-
# Dummy function to make Spaces detect GPU usage
|
16 |
-
pass
|
17 |
-
|
18 |
-
claim_gpu()
|
19 |
-
|
20 |
-
|
21 |
# Login automatically if HF_TOKEN is present
|
22 |
hf_token = os.getenv("HF_TOKEN")
|
23 |
if hf_token:
|
@@ -52,9 +42,10 @@ index.add(corpus_embeddings_np)
|
|
52 |
|
53 |
# Generator (choose one: local HF model or OpenAI)
|
54 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
55 |
-
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3",
|
56 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
|
57 |
|
|
|
58 |
def rag_pipeline(query):
|
59 |
# Embed query
|
60 |
query_embedding = embedder.encode([query], convert_to_tensor=True, device='cpu').numpy()
|
@@ -69,11 +60,7 @@ def rag_pipeline(query):
|
|
69 |
print("-", repr(doc))
|
70 |
|
71 |
# # Rerank
|
72 |
-
# rerank_pairs = [[str(query), str(doc)] for doc in retrieved_docs
|
73 |
-
# if not rerank_pairs:
|
74 |
-
# return "No valid documents found to rerank."
|
75 |
-
# scores = reranker.predict(rerank_pairs)
|
76 |
-
|
77 |
# scores = reranker.predict(rerank_pairs)
|
78 |
# reranked_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), reverse=True)]
|
79 |
|
|
|
8 |
import os
|
9 |
import spaces
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Login automatically if HF_TOKEN is present
|
12 |
hf_token = os.getenv("HF_TOKEN")
|
13 |
if hf_token:
|
|
|
42 |
|
43 |
# Generator (choose one: local HF model or OpenAI)
|
44 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
45 |
+
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", torch_dtype=torch.float16)
|
46 |
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150)
|
47 |
|
48 |
+
@spaces.GPU
|
49 |
def rag_pipeline(query):
|
50 |
# Embed query
|
51 |
query_embedding = embedder.encode([query], convert_to_tensor=True, device='cpu').numpy()
|
|
|
60 |
print("-", repr(doc))
|
61 |
|
62 |
# # Rerank
|
63 |
+
# rerank_pairs = [[str(query), str(doc)] for doc in retrieved_docs]
|
|
|
|
|
|
|
|
|
64 |
# scores = reranker.predict(rerank_pairs)
|
65 |
# reranked_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), reverse=True)]
|
66 |
|