Spaces:
Sleeping
Sleeping
daniel
#1
by
danielpleus
- opened
- .gitattributes +36 -5
- .gitignore +0 -3
- .pre-commit-config.yaml +0 -3
- Dockerfile +0 -3
- README.md +12 -12
- app.py +0 -121
- chatbot/build_store.py +0 -3
- chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/data_level0.bin +0 -3
- chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/header.bin +0 -3
- chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/length.bin +0 -3
- chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/link_lists.bin +0 -3
- chatbot/chromadb/chroma.sqlite3 +0 -3
- processing/wiki/links.txt +0 -3
- processing/wiki/scrape_wiki.py +0 -3
- processing/wiki/wiki.py +0 -3
- requirements.txt +0 -9
.gitattributes
CHANGED
@@ -1,5 +1,36 @@
|
|
1 |
-
*.
|
2 |
-
*.
|
3 |
-
*.
|
4 |
-
|
5 |
-
*.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d91b20accd452971bc5df3a5644bfdc2f19a7c6f9e0916c19ac4fc129004ddd0
|
3 |
-
size 54
|
|
|
|
|
|
|
|
.pre-commit-config.yaml
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:94d98d9ee1e7e5fd27a1d608ddd0ffe1d2ec349df83178d5b12287376637010a
|
3 |
-
size 603
|
|
|
|
|
|
|
|
Dockerfile
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5f937f23f227ebf14b66fe97c7a715467211833c03a8da8fb2c1f708f0e4421d
|
3 |
-
size 358
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
-
---
|
2 |
-
title: Emsland Bot
|
3 |
-
emoji: 📈
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: blue
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.34.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Emsland Bot
|
3 |
+
emoji: 📈
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: blue
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.34.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
DELETED
@@ -1,121 +0,0 @@
|
|
1 |
-
from haystack.components.embedders import SentenceTransformersTextEmbedder
|
2 |
-
from haystack import Pipeline
|
3 |
-
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
|
4 |
-
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
|
5 |
-
from haystack.components.generators import OpenAIGenerator
|
6 |
-
from haystack.components.builders import PromptBuilder
|
7 |
-
import haystack.logging
|
8 |
-
|
9 |
-
|
10 |
-
import streamlit as st
|
11 |
-
|
12 |
-
from dotenv import load_dotenv
|
13 |
-
from haystack import component
|
14 |
-
import logging
|
15 |
-
|
16 |
-
haystack.logging.configure_logging(use_json=True)
|
17 |
-
|
18 |
-
logging.basicConfig(
|
19 |
-
format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING
|
20 |
-
)
|
21 |
-
logging.getLogger("haystack").setLevel(logging.INFO)
|
22 |
-
|
23 |
-
load_dotenv()
|
24 |
-
|
25 |
-
|
26 |
-
@component
|
27 |
-
class ListToString:
|
28 |
-
@component.output_types(text=str)
|
29 |
-
def run(self, input_list: list[str]):
|
30 |
-
print(input_list[0])
|
31 |
-
return {"text": input_list[0]}
|
32 |
-
|
33 |
-
|
34 |
-
@st.cache_resource
|
35 |
-
def retrieval_pipeline(path):
|
36 |
-
document_store = ChromaDocumentStore(persist_path=path)
|
37 |
-
retriever = ChromaEmbeddingRetriever(document_store, top_k=5)
|
38 |
-
|
39 |
-
template = """Transform this query into a imaginary response that the
|
40 |
-
user could expect based on your knowledge. Use 1-3 sentences. Replace
|
41 |
-
entities or names that you invent with <axz>. The result should be in
|
42 |
-
German.
|
43 |
-
Query: {{
|
44 |
-
query}}"""
|
45 |
-
|
46 |
-
prompt_builder = PromptBuilder(template=template)
|
47 |
-
|
48 |
-
generator = OpenAIGenerator()
|
49 |
-
|
50 |
-
# Create a pipeline
|
51 |
-
basic_rag_pipeline = Pipeline()
|
52 |
-
|
53 |
-
# Add components to your pipeline
|
54 |
-
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
|
55 |
-
basic_rag_pipeline.add_component("generator", generator)
|
56 |
-
basic_rag_pipeline.add_component("list_to_string", ListToString())
|
57 |
-
basic_rag_pipeline.add_component("retriever", retriever)
|
58 |
-
basic_rag_pipeline.add_component(
|
59 |
-
"text_embedder",
|
60 |
-
SentenceTransformersTextEmbedder(model="intfloat/multilingual-e5-small"),
|
61 |
-
)
|
62 |
-
|
63 |
-
basic_rag_pipeline.connect("prompt_builder", "generator")
|
64 |
-
basic_rag_pipeline.connect("generator.replies", "list_to_string.input_list")
|
65 |
-
basic_rag_pipeline.connect("list_to_string.text", "text_embedder.text")
|
66 |
-
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
|
67 |
-
|
68 |
-
return basic_rag_pipeline
|
69 |
-
|
70 |
-
|
71 |
-
def generation_pipeline():
|
72 |
-
template = """
|
73 |
-
Given the following information, answer the question.
|
74 |
-
|
75 |
-
Context:
|
76 |
-
{% for document in documents %}
|
77 |
-
{{ document.content }}
|
78 |
-
{% endfor %}
|
79 |
-
|
80 |
-
Bleibe chronologisch. Erkläre Konzepte und Begriffe wenn nötig.
|
81 |
-
|
82 |
-
Question: {{question}}
|
83 |
-
Answer:
|
84 |
-
"""
|
85 |
-
|
86 |
-
prompt_builder = PromptBuilder(template=template)
|
87 |
-
|
88 |
-
generator = OpenAIGenerator(model="gpt-4")
|
89 |
-
|
90 |
-
# Create a pipeline
|
91 |
-
basic_rag_pipeline = Pipeline()
|
92 |
-
|
93 |
-
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
|
94 |
-
basic_rag_pipeline.add_component("llm", generator)
|
95 |
-
|
96 |
-
basic_rag_pipeline.connect("prompt_builder", "llm")
|
97 |
-
|
98 |
-
return basic_rag_pipeline
|
99 |
-
|
100 |
-
|
101 |
-
retrieval_pipe = retrieval_pipeline("chatbot/chromadb")
|
102 |
-
generation_pipe = generation_pipeline()
|
103 |
-
|
104 |
-
prompt = st.chat_input("Say something")
|
105 |
-
|
106 |
-
if prompt:
|
107 |
-
response = retrieval_pipe.run({"prompt_builder": {"query": prompt}})
|
108 |
-
|
109 |
-
st.markdown("### Sources")
|
110 |
-
st.write(response["retriever"]["documents"])
|
111 |
-
|
112 |
-
answer = generation_pipe.run(
|
113 |
-
{
|
114 |
-
"prompt_builder": {
|
115 |
-
"question": prompt,
|
116 |
-
"documents": response["retriever"]["documents"],
|
117 |
-
}
|
118 |
-
}
|
119 |
-
)
|
120 |
-
st.markdown("### Answer")
|
121 |
-
st.write(answer["llm"]["replies"][0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chatbot/build_store.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9d6e26a1c8724e58f7db4ca64b08415a406690bfe0703024f86ae2a0686d7661
|
3 |
-
size 2571
|
|
|
|
|
|
|
|
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/data_level0.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6342cb9fe72281766832989823eb5f623785f82c18981e4501d27d21707e338c
|
3 |
-
size 3352000
|
|
|
|
|
|
|
|
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/header.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:165bc99a5045bf6fddf5b0248ae1f7b2a7095b4d2ea27b8a6009770df21cf01c
|
3 |
-
size 100
|
|
|
|
|
|
|
|
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/length.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:38f4d23fecc5bdec9fd8f19e0df75a7823844054ef28ce0bd949314be707bd5e
|
3 |
-
size 8000
|
|
|
|
|
|
|
|
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/link_lists.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2ad572bf16992d01cd6da523f5febd2c458964c611155a1bf5ee2e98246e6f2b
|
3 |
-
size 16976
|
|
|
|
|
|
|
|
chatbot/chromadb/chroma.sqlite3
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:426e55f129c2ed11a8f30c4a07a4948158e20764cc64c955b911bbb31bc03b39
|
3 |
-
size 43278336
|
|
|
|
|
|
|
|
processing/wiki/links.txt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1cc172879b60fc68c1b2c910a0075665e2f396cc3234964cee493ca441833655
|
3 |
-
size 16707
|
|
|
|
|
|
|
|
processing/wiki/scrape_wiki.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0044a5c0e51f394175a24cc24ad206c792882b0136221a31760e928984a6a769
|
3 |
-
size 1700
|
|
|
|
|
|
|
|
processing/wiki/wiki.py
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:22c7316cd5ba296fae6a1cf9f14e0d92737ad85808f8826b4945109d3511b26a
|
3 |
-
size 2399
|
|
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
streamlit
|
2 |
-
haystack-ai
|
3 |
-
bitsandbytes
|
4 |
-
accelerate
|
5 |
-
pypdf
|
6 |
-
cryptography
|
7 |
-
pre-commit
|
8 |
-
chroma-haystack
|
9 |
-
sentence_transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|