.gitattributes CHANGED
@@ -1,5 +1,36 @@
1
- *.sqlite3 filter=lfs diff=lfs merge=lfs -text
2
- *.gitignore filter=lfs diff=lfs merge=lfs -text
3
- *.yaml filter=lfs diff=lfs merge=lfs -text
4
- /Dockerfile filter=lfs diff=lfs merge=lfs -text
5
- *.sqlite3 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sqlite3 filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d91b20accd452971bc5df3a5644bfdc2f19a7c6f9e0916c19ac4fc129004ddd0
3
- size 54
 
 
 
 
.pre-commit-config.yaml DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:94d98d9ee1e7e5fd27a1d608ddd0ffe1d2ec349df83178d5b12287376637010a
3
- size 603
 
 
 
 
Dockerfile DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f937f23f227ebf14b66fe97c7a715467211833c03a8da8fb2c1f708f0e4421d
3
- size 358
 
 
 
 
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Emsland Bot
3
- emoji: 📈
4
- colorFrom: red
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Emsland Bot
3
+ emoji: 📈
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.34.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py DELETED
@@ -1,121 +0,0 @@
1
- from haystack.components.embedders import SentenceTransformersTextEmbedder
2
- from haystack import Pipeline
3
- from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
4
- from haystack_integrations.document_stores.chroma import ChromaDocumentStore
5
- from haystack.components.generators import OpenAIGenerator
6
- from haystack.components.builders import PromptBuilder
7
- import haystack.logging
8
-
9
-
10
- import streamlit as st
11
-
12
- from dotenv import load_dotenv
13
- from haystack import component
14
- import logging
15
-
16
- haystack.logging.configure_logging(use_json=True)
17
-
18
- logging.basicConfig(
19
- format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING
20
- )
21
- logging.getLogger("haystack").setLevel(logging.INFO)
22
-
23
- load_dotenv()
24
-
25
-
26
- @component
27
- class ListToString:
28
- @component.output_types(text=str)
29
- def run(self, input_list: list[str]):
30
- print(input_list[0])
31
- return {"text": input_list[0]}
32
-
33
-
34
- @st.cache_resource
35
- def retrieval_pipeline(path):
36
- document_store = ChromaDocumentStore(persist_path=path)
37
- retriever = ChromaEmbeddingRetriever(document_store, top_k=5)
38
-
39
- template = """Transform this query into a imaginary response that the
40
- user could expect based on your knowledge. Use 1-3 sentences. Replace
41
- entities or names that you invent with <axz>. The result should be in
42
- German.
43
- Query: {{
44
- query}}"""
45
-
46
- prompt_builder = PromptBuilder(template=template)
47
-
48
- generator = OpenAIGenerator()
49
-
50
- # Create a pipeline
51
- basic_rag_pipeline = Pipeline()
52
-
53
- # Add components to your pipeline
54
- basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
55
- basic_rag_pipeline.add_component("generator", generator)
56
- basic_rag_pipeline.add_component("list_to_string", ListToString())
57
- basic_rag_pipeline.add_component("retriever", retriever)
58
- basic_rag_pipeline.add_component(
59
- "text_embedder",
60
- SentenceTransformersTextEmbedder(model="intfloat/multilingual-e5-small"),
61
- )
62
-
63
- basic_rag_pipeline.connect("prompt_builder", "generator")
64
- basic_rag_pipeline.connect("generator.replies", "list_to_string.input_list")
65
- basic_rag_pipeline.connect("list_to_string.text", "text_embedder.text")
66
- basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
67
-
68
- return basic_rag_pipeline
69
-
70
-
71
- def generation_pipeline():
72
- template = """
73
- Given the following information, answer the question.
74
-
75
- Context:
76
- {% for document in documents %}
77
- {{ document.content }}
78
- {% endfor %}
79
-
80
- Bleibe chronologisch. Erkläre Konzepte und Begriffe wenn nötig.
81
-
82
- Question: {{question}}
83
- Answer:
84
- """
85
-
86
- prompt_builder = PromptBuilder(template=template)
87
-
88
- generator = OpenAIGenerator(model="gpt-4")
89
-
90
- # Create a pipeline
91
- basic_rag_pipeline = Pipeline()
92
-
93
- basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
94
- basic_rag_pipeline.add_component("llm", generator)
95
-
96
- basic_rag_pipeline.connect("prompt_builder", "llm")
97
-
98
- return basic_rag_pipeline
99
-
100
-
101
- retrieval_pipe = retrieval_pipeline("chatbot/chromadb")
102
- generation_pipe = generation_pipeline()
103
-
104
- prompt = st.chat_input("Say something")
105
-
106
- if prompt:
107
- response = retrieval_pipe.run({"prompt_builder": {"query": prompt}})
108
-
109
- st.markdown("### Sources")
110
- st.write(response["retriever"]["documents"])
111
-
112
- answer = generation_pipe.run(
113
- {
114
- "prompt_builder": {
115
- "question": prompt,
116
- "documents": response["retriever"]["documents"],
117
- }
118
- }
119
- )
120
- st.markdown("### Answer")
121
- st.write(answer["llm"]["replies"][0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chatbot/build_store.py DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d6e26a1c8724e58f7db4ca64b08415a406690bfe0703024f86ae2a0686d7661
3
- size 2571
 
 
 
 
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/data_level0.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6342cb9fe72281766832989823eb5f623785f82c18981e4501d27d21707e338c
3
- size 3352000
 
 
 
 
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/header.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:165bc99a5045bf6fddf5b0248ae1f7b2a7095b4d2ea27b8a6009770df21cf01c
3
- size 100
 
 
 
 
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/length.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:38f4d23fecc5bdec9fd8f19e0df75a7823844054ef28ce0bd949314be707bd5e
3
- size 8000
 
 
 
 
chatbot/chromadb/6085ec03-95a1-4c3b-99fc-51882acccba3/link_lists.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad572bf16992d01cd6da523f5febd2c458964c611155a1bf5ee2e98246e6f2b
3
- size 16976
 
 
 
 
chatbot/chromadb/chroma.sqlite3 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:426e55f129c2ed11a8f30c4a07a4948158e20764cc64c955b911bbb31bc03b39
3
- size 43278336
 
 
 
 
processing/wiki/links.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc172879b60fc68c1b2c910a0075665e2f396cc3234964cee493ca441833655
3
- size 16707
 
 
 
 
processing/wiki/scrape_wiki.py DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0044a5c0e51f394175a24cc24ad206c792882b0136221a31760e928984a6a769
3
- size 1700
 
 
 
 
processing/wiki/wiki.py DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c7316cd5ba296fae6a1cf9f14e0d92737ad85808f8826b4945109d3511b26a
3
- size 2399
 
 
 
 
requirements.txt DELETED
@@ -1,9 +0,0 @@
1
- streamlit
2
- haystack-ai
3
- bitsandbytes
4
- accelerate
5
- pypdf
6
- cryptography
7
- pre-commit
8
- chroma-haystack
9
- sentence_transformers