Spaces:
Running
Running
Update to qwen:1.8b
Browse files- app/rag.py +5 -7
- start_service.sh +1 -1
app/rag.py
CHANGED
@@ -15,14 +15,12 @@ class ChatPDF:
|
|
15 |
chain = None
|
16 |
|
17 |
def __init__(self):
|
18 |
-
|
19 |
-
self.
|
20 |
-
|
21 |
self.prompt = PromptTemplate.from_template(
|
22 |
"""
|
23 |
-
<s> [INST]
|
24 |
-
to answer the question. If you don't know the answer, just say that you don't know. Use three sentences
|
25 |
-
maximum and keep the answer concise. [/INST] </s>
|
26 |
[INST] Question: {question}
|
27 |
Context: {context}
|
28 |
Answer: [/INST]
|
@@ -38,7 +36,7 @@ class ChatPDF:
|
|
38 |
self.retriever = vector_store.as_retriever(
|
39 |
search_type="similarity_score_threshold",
|
40 |
search_kwargs={
|
41 |
-
"k":
|
42 |
"score_threshold": 0.5,
|
43 |
},
|
44 |
)
|
|
|
15 |
chain = None
|
16 |
|
17 |
def __init__(self):
|
18 |
+
#8k context window
|
19 |
+
self.model = ChatOllama(model="qwen:1.8b")
|
20 |
+
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=8)
|
21 |
self.prompt = PromptTemplate.from_template(
|
22 |
"""
|
23 |
+
<s> [INST] Use the pieces of context to answer the question concisely.[/INST] </s>
|
|
|
|
|
24 |
[INST] Question: {question}
|
25 |
Context: {context}
|
26 |
Answer: [/INST]
|
|
|
36 |
self.retriever = vector_store.as_retriever(
|
37 |
search_type="similarity_score_threshold",
|
38 |
search_kwargs={
|
39 |
+
"k": 56,
|
40 |
"score_threshold": 0.5,
|
41 |
},
|
42 |
)
|
start_service.sh
CHANGED
@@ -7,7 +7,7 @@ ollama serve &
|
|
7 |
sleep 5
|
8 |
|
9 |
# Pull and run <YOUR_MODEL_NAME>
|
10 |
-
ollama pull
|
11 |
|
12 |
#
|
13 |
fastapi run /code/app/main.py --port 7860
|
|
|
7 |
sleep 5
|
8 |
|
9 |
# Pull and run <YOUR_MODEL_NAME>
|
10 |
+
ollama pull qwen:1.8b
|
11 |
|
12 |
#
|
13 |
fastapi run /code/app/main.py --port 7860
|