olenkap commited on
Commit
a73d4bf
·
verified ·
1 Parent(s): 8749a89

Upload 15 files

Browse files
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ # C extensions
6
+ *.so
7
+
8
+ # Distribution / packaging
9
+ .Python
10
+ build/
11
+ develop-eggs/
12
+ dist/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ lib/
17
+ lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ share/python-wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+ cover/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+ db.sqlite3-journal
62
+
63
+ # Flask stuff:
64
+ instance/
65
+ .webassets-cache
66
+
67
+ # Scrapy stuff:
68
+ .scrapy
69
+
70
+ # Sphinx documentation
71
+ docs/_build/
72
+
73
+ # PyBuilder
74
+ .pybuilder/
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ # For a library or package, you might want to ignore these files since the code is
86
+ # intended to run in multiple environments; otherwise, check them in:
87
+ # .python-version
88
+
89
+ # pipenv
90
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
92
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
93
+ # install all needed dependencies.
94
+ #Pipfile.lock
95
+
96
+ # UV
97
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
98
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
99
+ # commonly ignored for libraries.
100
+ #uv.lock
101
+
102
+ # poetry
103
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
105
+ # commonly ignored for libraries.
106
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107
+ #poetry.lock
108
+
109
+ # pdm
110
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111
+ #pdm.lock
112
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113
+ # in version control.
114
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
115
+ .pdm.toml
116
+ .pdm-python
117
+ .pdm-build/
118
+
119
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
120
+ __pypackages__/
121
+
122
+ # Celery stuff
123
+ celerybeat-schedule
124
+ celerybeat.pid
125
+
126
+ # SageMath parsed files
127
+ *.sage.py
128
+
129
+ # Environments
130
+ .env
131
+ .venv
132
+ env/
133
+ venv/
134
+ ENV/
135
+ env.bak/
136
+ venv.bak/
137
+
138
+ # Spyder project settings
139
+ .spyderproject
140
+ .spyproject
141
+
142
+ # Rope project settings
143
+ .ropeproject
144
+
145
+ # mkdocs documentation
146
+ /site
147
+
148
+ # mypy
149
+ .mypy_cache/
150
+ .dmypy.json
151
+ dmypy.json
152
+
153
+ # Pyre type checker
154
+ .pyre/
155
+
156
+ # pytype static type analyzer
157
+ .pytype/
158
+
159
+ # Cython debug symbols
160
+ cython_debug/
161
+
162
+ # PyCharm
163
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
164
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
165
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
166
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
167
+ #.idea/
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,12 +1,42 @@
1
- ---
2
- title: RAG
3
- emoji: 📉
4
- colorFrom: purple
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.8.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RAG. Question answering bot.
2
+ ![](https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExNXZyaTM1anczcGE0cDliYWZkNXhvY3ZrOGRzeTJ5a3EwcXl3aGVnZCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/12xDxBbj7CPAOI/giphy.gif)
3
+
4
+ ### Topics
5
+ - [Data source](#data-source) ✔️
6
+ - [Chunking](#chunking) ✔️
7
+ - [LLM](#llm) ✔️
8
+ - [Retriever](#retriever) ✔️
9
+ - [Reranker](#reranker) ✔️
10
+ - [Citation](#citation) ❌
11
+ - [Web UI and deployment](#web-ui-and-deployment) ✔️
12
+
13
+
14
+ ## Data source
15
+
16
+ I used documents found on the Internet. You can take a look at them in **docs** directory, and you can ask questions based on that context. There also is possibility to upload your txt file and use it as a context.
17
+
18
+ ## Chunking
19
+ Chunking was performed using the same method explained in live-coding session. No other libraries were involved.
20
+
21
+ ## LLM
22
+ As LLM I used pretrained model [llama3-70b-8192](https://huggingface.co/Groq/Llama-3-Groq-70B-Tool-Use).
23
+
24
+ ## Retriever
25
+ Retrieving can be performed in three different ways. You can either use BM25 retriever or a dense retriever by calculating semantic scores. Using both of them in hybrid approach is also an option.
26
+
27
+ Dense retriever used in this lab - sentence-transformers/all-distilroberta-v1.
28
+
29
+ #### Here's an example when dense retriever works better than BM25:
30
+ ![image_2024-12-08_21-59-16](https://github.com/user-attachments/assets/50e2b14a-dc28-4e4b-8752-90ab46d2d883)
31
+ ![image_2024-12-08_22-00-02](https://github.com/user-attachments/assets/eec3f618-99f0-4b43-b6a5-573ef612ae3e)
32
+
33
+
34
+ ## Reranker
35
+
36
+ As a reranker there was used cross encoder cross-encoder/stsb-roberta-base. It may be not efficient in my case, as far as amount of documents is quite small, so it takes time to process the data, but does not improve the process of extracting context.
37
+ ## Citation
38
+
39
+ Isn't implemented
40
+ ## Web UI and deployment
41
+ I used gradio lib for demo and hosting.
42
+
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from rag import QuestionAnsweringBot
3
+ from rag import read_docs, dir_path
4
+ from typing import List
5
+
6
+
7
+ def upload_file(files) -> List[str]:
8
+ file_paths = [file.name for file in files]
9
+ return file_paths
10
+
11
+
12
+ def read_uploaded_docs(uploaded_docs: List[str]) -> List[str]:
13
+ docs = []
14
+ for path in uploaded_docs:
15
+ with open(path, 'r', encoding='utf-8') as file:
16
+ text = file.read()
17
+ docs.append(text)
18
+ return docs
19
+
20
+
21
+ def answer_question(docs, query: str, score: str, api_key):
22
+ if not api_key:
23
+ return "API key needed to proceed."
24
+
25
+ docs = read_uploaded_docs(docs) if docs else read_docs(dir_path=dir_path)
26
+
27
+ match score:
28
+ case 'BM25': bot = QuestionAnsweringBot(docs, 0, api_key)
29
+ case 'Dense': bot = QuestionAnsweringBot(docs, 1, api_key)
30
+ case 'Both': bot = QuestionAnsweringBot(docs, 2, api_key)
31
+
32
+ answer = bot.answer_question(question=query)
33
+ return answer
34
+
35
+
36
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
37
+ gr.Markdown(
38
+ """
39
+ # Question Answering Bot
40
+
41
+ This bot uses default doucuments or those you provided to answer questions based on their content.
42
+ You can select from the following scoring methods for retrieving the context:
43
+ - **BM25 scores**
44
+ - **Dense retriever**
45
+ - **Hybrid approach** (both BM25 and dense retriever combined).
46
+
47
+ ## Instructions
48
+ - Enter your **Groq API Key** in the textbox below.
49
+ - The API key can be generated using [this link](https://console.groq.com/keys).
50
+ - Input your query and select the scoring method to receive an answer.
51
+ - Ask questions directly based on files given in **docs** directory in my [github repository](https://github.com/olenkapyrih/RAG/tree/master)
52
+ - Or upload your files and use them as context. Just remember that the only allowed format is **.txt**
53
+ """
54
+ )
55
+
56
+ uploaded_docs = gr.File(
57
+ label="Upload Documents",
58
+ file_types=[".txt"],
59
+ file_count="multiple"
60
+ )
61
+
62
+ api_key = gr.Textbox(
63
+ label='Groq API Key',
64
+ placeholder="Enter your Groq API Key securely here.",
65
+ type="password"
66
+ )
67
+
68
+ query = gr.Textbox(
69
+ label='Query',
70
+ placeholder="Ask a question. \
71
+ Ex: Does a slavery still exist? Tell me about it."
72
+ )
73
+
74
+ score = gr.Radio(
75
+ choices=["BM25", "Dense", "Both"],
76
+ label="Select Scoring Method",
77
+ value="Both"
78
+ )
79
+
80
+
81
+ outp = gr.Textbox(label='Answer', lines=6)
82
+ button = gr.Button(value='Submit', variant='primary', key='enter')
83
+ button.click(answer_question, inputs=[uploaded_docs, query, score, api_key], outputs=outp, show_progress=True)
84
+
85
+
86
+ demo.launch(share=True)
chunker.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Generator, List
2
+
3
+
4
+ def chunk_document(
5
+ doc: str,
6
+ desired_chunk_size: int,
7
+ max_chunk_size: int
8
+ ) -> Generator[str, None, None]:
9
+ chunk = ''
10
+ for line in doc.splitlines():
11
+ chunk += line + '\n'
12
+ if len(chunk) >= desired_chunk_size:
13
+ yield chunk[:max_chunk_size]
14
+ chunk = ''
15
+ if chunk:
16
+ yield chunk
17
+
18
+
19
+ def chunk_documents(
20
+ docs: List[str],
21
+ desired_chunk_size: int = 500,
22
+ max_chunk_size: int = 3000
23
+ ) -> List[str]:
24
+ chunks = []
25
+ for doc in docs:
26
+ chunks += list(chunk_document(
27
+ doc=doc,
28
+ desired_chunk_size=desired_chunk_size,
29
+ max_chunk_size=max_chunk_size
30
+ ))
31
+
32
+ return chunks
docs/A Boon or Bane for Students.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ In this essay on technology, we are going to discuss what technology is, what are its uses, and also what technology can do? First of all, technology refers to the use of technical and scientific knowledge to create, monitor, and design machinery. Also, technology helps in making other goods that aid mankind.
2
+
3
+ Essay on Technology – A Boon or Bane?
4
+ Experts are debating on this topic for years. Also, the technology covered a long way to make human life easier but the negative aspect of it can’t be ignored. Over the years technological advancement has caused a severe rise in pollution. Also, pollution has become a major cause of many health issues. Besides, it has cut off people from society rather than connecting them. Above all, it has taken away many jobs from the workers class.
5
+ Familiarity between Technology and Science
6
+ As they are completely different fields but they are interdependent on each other. Also, it is due to science contribution we can create new innovation and build new technological tools. Apart from that, the research conducted in laboratories contributes a lot to the development of technologies. On the other hand, technology extends the agenda of science.
7
+
8
+ Vital Part of our Life
9
+ Regularly evolving technology has become an important part of our lives. Also, newer technologies are taking the market by storm and the people are getting used to them in no time. Above all, technological advancement has led to the growth and development of nations.
10
+
11
+ Negative Aspect of Technology
12
+ Although technology is a good thing, everything has two sides. Technology also has two sides one is good and the other is bad. Here are some negative aspects of technology that we are going to discuss.
13
+ Pollution
14
+ With new technology the industrialization increases which give birth to many pollutions like air, water, soil, and noise. Also, they cause many health-related issues in animals, birds, and human beings.
15
+
16
+ Exhaustion of Natural Resources
17
+ New technology requires new resources for which the balance is disturbed. Eventually, this will lead to over-exploitation of natural resources which ultimately disturbs the balance of nature.
18
+
19
+ Unemployment
20
+ A single machine can replace many workers. Also, machines can do work at a constant pace for several hours or days without stopping. Due to this, many workers lost their job which ultimately increases unemployment.
21
+
22
+ Types of Technology
23
+ Generally, we judge technology on the same scale but in reality, technology is divided into various types. This includes information technology, industrial technology, architectural technology, creative technology and many more. Let’s discuss these technologies in brief.
24
+
25
+ Industrial Technology
26
+ This technology organizes engineering and manufacturing technology for the manufacturing of machines. Also, this makes the production process easier and convenient.
27
+
28
+ Creative Technology
29
+ This process includes art, advertising, and product design which are made with the help of software. Also, it comprises of 3D printers, virtual reality, computer graphics, and other wearable technologies.
30
+
31
+ Information Technology
32
+ This technology involves the use of telecommunication and computer to send, receive and store information. Internet is the best example of Information technology.
33
+ Today, everything we use in our daily life is a gift of technology and without which we cannot imagine our lives. Also, we cannot refuse the facts that it has caused severe damage to our surroundings.
docs/Black Friday.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Every year in November, people look for bargains on Black Friday. But did you know that the same day is also Buy Nothing Day
2
+ What is Black Friday?
3
+ Black Friday is the day after the American holiday of Thanksgiving, which is celebrated on the fourth Thursday of November. Because it is a holiday in the United States, it has long been a popular day for consumers to start shopping for Christmas. Over the last 20 years big retailers have started to offer discounts and bargains on this day, and it has become more and more popular. Last year, people in the USA spent an estimated $54.7 billion between Black Friday and Cyber Monday (the Monday after Thanksgiving, when people often buy more online). The idea of Black Friday has also spread around the world. For example, in 2017, people in the UK spent the equivalent of $10.3 billion, in Germany $7.6 billion and in France $6.2 billion.
4
+
5
+ Is Black Friday out of control?
6
+ Many of us love to get a bargain, but some feel that events like Black Friday encourage people to buy things that they don’t really need and can’t afford. Many people seem to completely lose control of both their spending and their tempers. It is easy to find video online of customers physically fighting each other over bargains. It is also argued that Black Friday is bad for small shopkeepers, who cannot afford to offer the kinds of price cuts that the big companies can.
7
+
8
+ What’s the alternative to Black Friday?
9
+ Instead of taking the opportunity to buy as much as possible on Black Friday, you could do the opposite and buy absolutely nothing. Since 1997, Buy Nothing Day has been held on the same day as Black Friday. The rules are simple. Just don’t buy anything at all for 24 hours. Many people are surprised how difficult this actually is. The aim is to make people think more about their spending and to make better decisions about what they buy and where they buy it from.
10
+
11
+ Ethical spending
12
+ As well as spending less and not buying unnecessary items, Buy Nothing Day aims to raise awareness of how to be a more ethical consumer. For example, you can avoid buying ‘fast fashion’, that is, very cheap clothes that are worn a few times before being thrown away. Or you could decide not to automatically upgrade your mobile at the end of a contract. These kinds of decisions can help to protect the environment as well as saving you money.
13
+
14
+ What else can you do on Buy Nothing Day?
15
+ Some people carry out protests at shopping centres. Others avoid the shops completely and go for a walk in nature instead. Another alternative, the Buy Nothing Coat Exchange, is an idea which is spreading. People donate winter coats throughout November and anyone who needs one can come and take one on Buy Nothing Day.
docs/Computer and its Uses for School Students and Children.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ In this essay on computer, we are going to discuss some useful things about computers. The modern-day computer has become an important part of our daily life. Also, their usage has increased much fold during the last decade. Nowadays, they use the computer in every office whether private or government. Mankind is using computers for over many decades now. Also, they are used in many fields like agriculture, designing, machinery making, defense and many more. Above all, they have revolutionized the whole world.
2
+
3
+ History of Computers
4
+ It is very difficult to find the exact origin of computers. But according to some experts computer exists at the time of world war-II. Also, at that time they were used for keeping data. But, it was for only government use and not for public use. Above all, in the beginning, the computer was a very large and heavy machine.
5
+
6
+ Working of a Computer
7
+ The computer runs on a three-step cycle namely input, process, and output. Also, the computer follows this cycle in every process it was asked to do. In simple words, the process can be explained in this way. The data which we feed into the computer is input, the work CPU do is process and the result which the computer give is output.
8
+
9
+ Components and Types of Computer
10
+ The simple computer basically consists of CPU, monitor, mouse, and keyboard. Also, there are hundreds of other computer parts that can be attached to it. These other parts include a printer, laser pen, scanner, etc.
11
+
12
+ The computer is categorized into many different types like supercomputers, mainframes, personal computers (desktop), PDAs, laptop, etc. The mobile phone is also a type of computer because it fulfills all the criteria of being a computer.
13
+ Uses of Computer in Various Fields
14
+ As the usage of computer increased it became a necessity for almost every field to use computers for their operations. Also, they have made working and sorting things easier. Below we are mentioning some of the important fields that use a computer in their daily operation.
15
+
16
+ Medical Field
17
+ They use computers to diagnose diseases, run tests and for finding the cure for deadly diseases. Also, they are able to find a cure for many diseases because of computers.
18
+
19
+ Research
20
+ Whether it’s scientific research, space research or any social research computers help in all of them. Also, due to them, we are able to keep a check on the environment, space, and society. Space research helped us to explore the galaxies. While scientific research has helped us to locate resources and various other useful resources from the earth.
21
+
22
+ Defense
23
+ For any country, his defence is most important for the safety and security of its people. Also, computer in this field helps the country’s security agencies to detect a threat which can be harmful in the future. Above all the defense industry use them to keep surveillance on our enemy.
24
+
25
+ Threats from a Computer
26
+ Computers have become a necessity also, they have become a threat too. This is due to hackers who steal your private data and leak them on internet. Also, anyone can access this data. Apart from that, there are other threats like viruses, spams, bug and many other problems.
27
+
28
+ The computer is a very important machine that has become a useful part of our life. Also, the computers have twin-faces on one side it’s a boon and on the other side, it’s a bane. Its uses completely depend upon you. Apart from that, a day in the future will come when human civilization won’t be able to survive without computers as we depend on them too much. Till now it is a great discovery of mankind that has helped in saving thousands and millions of lives.
docs/How LED lights can save sea turtles’ lives.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Sea turtles are amazing animals that have lived in our oceans for millions of years. However, today, they face many dangers. One of these comes from non-natural light. When baby sea turtles, also called hatchlings, come out of their eggs, they need to find the ocean quickly. They usually do this by following the natural light of the Moon and stars reflecting off the water. But in many places, artificial lights – from streets, buildings, and homes – are much brighter than the Moon. These lights can confuse baby sea turtles and make it hard for them to find their way to the ocean. This causes them to lose their way and head toward the land instead of the sea. When this happens, the baby turtles can get lost, dehydrated, or even be eaten by other animals.
2
+
3
+ Although artificial light is usually a problem for sea turtles, we don’t have to live in the dark to protect them. Research shows that using special artificial lights, placed low to the ground and slightly covered so they can’t be seen from the beach, reduces the chances of sea turtles getting confused. These turtle-friendly lights are also better for people as they can improve visibility while driving by reducing the shine on the car windows. If you live near the coast or are visiting a beach where sea turtles live, you can help. Use lights that are not bright and low to the ground. Close curtains at night to reduce the amount of light coming from inside buildings.
4
+
5
+ Sea turtles have another enemy. Every year, hundreds of thousands of these sea animals are caught by large fishing boats in their fishing nets by accident – this means six to eight turtles daily for each boat in Mexico alone. Surprisingly, in this case, artificial lights can be helpful for sea turtles.
6
+
7
+ Studies show that turtles use their sight to find food, but when swimming underwater at night, it’s hard to see the fishing net. So, different organisations have developed fishing nets with LED lights. By adding lights to fishing nets, scientists have found a way to prevent turtles and other animals from getting caught in the nets, reducing the number of unwanted catches by 60% to 95% without lowering the amount of fish caught.
8
+
9
+ Using LED lights on fishing nets doesn’t just help sea turtles. When sea animals get stuck in the net by mistake, they might damage it. It costs time and money to remove sea turtles from their nets and to fix or replace the broken nets. In addition, LED lights are energy-efficient and last a long time. This new design of fishing nets reduces costs, making it a less expensive option.
docs/Modern-day slavery.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ December 2 is the United Nation's International Day for the Abolition of Slavery. Did you know that slavery still exists today and is probably much closer to you than you realise?
2
+ Blood and Earth
3
+ In his book Blood and Earth, Kevin Bales speaks with Ibrahim, a 23-year-old slave who has worked in a gold mine since he was nine. He is dying. His lungs are filled with liquid caused by the dust and bacteria in the mine. As their conversation ends, Ibrahim turns to Kevin Bales and says, 'I want to be remembered. When my story is written and your book is ready, will you send me a copy? I want to show it to others, to show them that I am not completely useless. I just want to show that something good can come out of my life.'
4
+
5
+ So what's the connection to you? As you read this article, you are probably using a smartphone, tablet, or laptop. Each device requires minerals – including gold. Perhaps the gold in your electronic device was mined by slaves.
6
+ Slavery today
7
+ According to the Global Slavery Index 2018, over 40 million people are victims of modern slavery, and of these, 15 million are in forced marriage. Slavery involves violence, physical or psychological, and control – often in the form of threats in order to generate profit. To quote Kevin Bales, 'Slavery is when one person controls another, uses violence to maintain that control and exploits them economically.' This violence may be physical and/or psychological, and the control may be verbal threats – but at the heart of slavery is exploitation and 'ownership' of another human being for profit. Forms of modern slavery include forced labour, human trafficking, commercial sexual exploitation, domestic servitude and forced marriage.
8
+
9
+ You might be surprised to see forced marriage included above. Sadly, forced marriage involves the same lack of choice, power imbalance, coercion and labour exploitation as other forms of slavery. This also includes forced child marriage – usually of girls, of 17 years or younger.
10
+
11
+ Slavery behind closed doors
12
+ Another form of slavery is domestic servitude. Across the globe, domestic workers, mostly women, migrate abroad to support their families back home. Employment agents in their country of origin promise a generous salary and good working conditions with a caring host family. This, however, may be far from reality. Domestic workers are sometimes forced to work long hours and their passports and mobile phones are taken away. In extreme cases, behind closed doors of private homes, they are locked up, starved, deprived of sleep and often physically and sexually abused. They are trapped, scared and unfamiliar with their new surroundings. Domestic servitude happens globally, including in the UK.
13
+
14
+ The power of consumer choice
15
+ Every item we buy has a back story. From electronics to textiles, from handmade carpets to coffee, tea and chocolate, each of these products might include child or adult slavery. Consider a product as innocent as chocolate. While the chocolate bar itself may have been produced in your country, the cocoa in the chocolate probably came from West Africa, where 60 per cent of the world's cocoa is produced. As you read this, thousands of children and adults live in slave-like conditions on cocoa farms. Unknowingly, your purchase might support slavery. However, consumer demand for ethically-sourced products and services can send a powerful message to producers. Imagine if we all refused to purchase goods that have a back story of slavery. Company sales, and therefore profits, would fall. Look around at items in your home and workplace and ask yourself the simple question, 'Where did this come from and who made it?'
16
+
17
+ Why didn't I learn about modern slavery at school?
18
+ Did you ever learn about modern slavery at school? History lessons may have included the horrific practice of slavery, however, it was probably considered something that was very much 'in the past'. But slavery still exists and it is the everyday reality for millions of people. It takes brave educators to raise awareness of the difficult, upsetting and invisible reality of modern slavery.
19
+
20
+ The good news is that thousands of individuals and anti-slavery organisations are taking action. One such organisation is The NO Project, which focuses specifically on the education of youth and young adults. 'Youth are the next generation of corporate leaders, policy makers and consumers,' says the founder of The NO Project. 'How we choose to spend our money says a lot about who we are. So, the question is – who are we? And remember, another time, in another place, that enslaved human being could be you.'
docs/Underwater search for lost love.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Yasuo Takamatsu met Yuko in 1988. Yasuo was a soldier and Yuko worked at a bank in Onagawa, Japan. They quickly fell in love. Yuko was gentle, modest, and enjoyed classical music and painting. On Friday, March 11, 2011, Yasuo drove Yuko to the bank. Later that day, a powerful earthquake hit the city, followed by a tsunami warning.
2
+
3
+ Yasuo was at a hospital with his mother when the earthquake happened. The main roads to Onagawa were blocked. He was driving back on the small back roads when Yuko texted, “Are you O.K.? I want to go home.” The tsunami reached Onagawa at 3:20 p.m., destroying buildings and killing people. The next morning, soldiers arrived to search for bodies. Yasuo searched for Yuko every day from morning till evening until June when he started a new job. Then he searched on weekends, always hoping not to find Yuko’s body.
4
+
5
+ A month after the tsunami, Yuko’s pink flip phone was found in the bank’s parking lot. An unsent text from 3:25 p.m. read, “So much tsunami.” Yasuo knew she had been alive until then. Other bank employees’ bodies were found later. One was found six weeks after the tsunami, another in September 2011, but Yasuo still searched for Yuko.
6
+
7
+ By September 2013, after two and a half years of searching on land, Yasuo decided to search the sea. He contacted a dive shop to learn how to dive. The instructor, Masayoshi Takahashi, organised dives to clean tsunami rubbish. Yasuo believed Takahashi could help find Yuko. Yasuo told him, “I want to learn to dive to find my wife.” On his first dive, the water was freezing. Yasuo was scared. He could have hurt himself or got caught by a rope, but that didn’t worry him. The water wasn’t clear, and that was the real danger. Takahashi told him not to touch the bottom to avoid moving the sand.
8
+
9
+ One day, Yasuo visited Masaaki Narita, who had lost his daughter, Emi, in the tsunami. Emi worked with Yuko at the bank. The women had gone to the bank’s roof but were swept away by the massive wave. Yasuo felt sorry for Narita and offered to look for Emi too. But Narita decided to dive himself. In February 2014, Yasuo introduced Narita to Takahashi.
10
+
11
+ In January 2016, Narita prepared for a dive. His wife, Hiromi, watched because she worried about him. The ocean was dangerous, and she didn’t want to lose him too. Narita said, “If I die, throw my ashes in the sea.” He dove, and after 35 minutes, resurfaced safely. Hiromi walked to her car and drove off. It was time to deliver rice balls and deep-fried chicken.
12
+
13
+ Despite all these efforts, Yasuo continued his search for Yuko, holding on to hope.
rag.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Client
2
+ from retriever import Retriever
3
+ from typing import List
4
+ import glob
5
+
6
+ dir_path = 'docs'
7
+
8
+ class QuestionAnsweringBot:
9
+
10
+ PROMPT = """
11
+ You are a helpful assistant that answers the questions.
12
+
13
+ Rules:
14
+ - Reply with answer only and nothing but answer.
15
+ - Say 'I don`t know' if you don`t know the answer.
16
+ - Use the provided context.
17
+ """
18
+
19
+ def __init__(self, docs: List[str], score: int, api_key) -> None:
20
+ self.retriever = Retriever(docs=docs, score=score)
21
+ self.client = Client(api_key=api_key)
22
+
23
+ def answer_question(self, question: str) -> str:
24
+ context = self.retriever.get_docs(query=question)
25
+ messages = [
26
+ {
27
+ "role": "system",
28
+ "content": self.PROMPT
29
+ },
30
+ {
31
+ "role": "user",
32
+ "content": f"Context: {context}\nQuestion: {question}"
33
+ }
34
+ ]
35
+ chat_completion = self.client.chat.completions.create(
36
+ messages=messages,
37
+ model="llama3-70b-8192"
38
+ )
39
+
40
+ return chat_completion.choices[0].message.content
41
+
42
+
43
+ def read_docs(dir_path) -> List[str]:
44
+ docs = []
45
+ for path in glob.glob(f'{dir_path}/*.txt'):
46
+ with open(path, 'r', encoding='utf-8') as file:
47
+ text = file.read()
48
+ docs.append(text)
49
+ return docs
requirements.txt ADDED
Binary file (2.48 kB). View file
 
retriever.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+
4
+ from typing import List
5
+ from rank_bm25 import BM25Okapi
6
+ from sentence_transformers import SentenceTransformer, CrossEncoder
7
+
8
+ from chunker import chunk_documents
9
+
10
+
11
+
12
+ class Retriever:
13
+ def __init__(self, docs: List[str], score: int) -> None:
14
+ self.docs = chunk_documents(docs=docs)
15
+ self.score = score
16
+ tokenized_docs = [doc.lower().split(" ") for doc in self.docs]
17
+ self.bm25 = BM25Okapi(tokenized_docs)
18
+ self.sbert = SentenceTransformer(
19
+ 'sentence-transformers/all-distilroberta-v1'
20
+ )
21
+ self.doc_embeddings = self.sbert.encode(
22
+ self.docs, show_progress_bar=True
23
+ )
24
+ self.cross_encoder = CrossEncoder("cross-encoder/stsb-roberta-base")
25
+
26
+
27
+ def get_docs(self, query: str, n: int = 5, score: int = 2) -> List[str]:
28
+ match score:
29
+ case 0:
30
+ bm25_scores = self._get_bm25_scores(query=query)
31
+ sorted_indices = torch.Tensor.tolist(
32
+ np.argsort(bm25_scores)
33
+ )[::-1]
34
+ case 1:
35
+ semantic_scores = self._get_semantic_scores(query=query)
36
+ sorted_indices = torch.Tensor.tolist(
37
+ np.argsort(semantic_scores)
38
+ )[::-1]
39
+ case 2:
40
+ bm25_scores = self._get_bm25_scores(query=query)
41
+ semantic_scores = self._get_semantic_scores(query=query)
42
+ scores = torch.tensor(0.3 * bm25_scores) + 0.7 * semantic_scores
43
+ sorted_indices = torch.Tensor.tolist(np.argsort(scores))[::-1]
44
+
45
+ preselected_docs = [self.docs[i] for i in sorted_indices][:n]
46
+ result = self.rerank(query=query, docs=preselected_docs)
47
+
48
+ return result
49
+
50
+ def _get_bm25_scores(self, query: str) -> np.ndarray[float]:
51
+ tokenized_query = query.lower().split(" ")
52
+ bm25_scores = self.bm25.get_scores(tokenized_query)
53
+
54
+ return bm25_scores
55
+
56
+ def _get_semantic_scores(self, query: str) -> torch.Tensor:
57
+ query_embeddings = self.sbert.encode(query)
58
+ semantic_scores = self.sbert.similarity(
59
+ query_embeddings, self.doc_embeddings
60
+ )
61
+
62
+ return semantic_scores[0]
63
+
64
+ def rerank(self, query: str, docs: List[str]) -> List[str]:
65
+ pairs = [(query, doc) for doc in docs]
66
+ rerank_scores = self.cross_encoder.predict(pairs)
67
+ reranked_docs = [doc for _, doc in sorted(zip(rerank_scores, docs), reverse=True)]
68
+
69
+ return reranked_docs