nijoow commited on
Commit
e76bd22
ยท
1 Parent(s): 8563055

Delete import streamlit as st.py

Browse files
Files changed (1) hide show
  1. import streamlit as st.py +0 -226
import streamlit as st.py DELETED
@@ -1,226 +0,0 @@
1
- Hugging
2
- Face
3
- 's logo
4
- Hugging
5
- Face
6
- Search
7
- models, datasets, users...
8
- Models
9
- Datasets
10
- Spaces
11
- Docs
12
- Solutions
13
- Pricing
14
-
15
- Log
16
- In
17
- Sign
18
- Up
19
- Spaces:
20
-
21
- tony346
22
- /
23
- Basic_RAG_AI_Chatbot_with_Llama2
24
-
25
- like
26
- 0
27
- App
28
- Files
29
- Community
30
- 3
31
- Basic_RAG_AI_Chatbot_with_Llama2
32
- /
33
- app.py
34
- tony346
35
- 's picture
36
- tony346
37
- Update
38
- app.py
39
- d55c1c9
40
- 11
41
- days
42
- ago
43
- raw
44
- history
45
- blame
46
- contribute
47
- delete
48
- No
49
- virus
50
- 6.39
51
- kB
52
- import streamlit as st
53
- from dotenv import load_dotenv
54
- from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
55
- from langchain.vectorstores import FAISS
56
- from langchain.embeddings import HuggingFaceEmbeddings # General embeddings from HuggingFace models.
57
- from langchain.memory import ConversationBufferMemory
58
- from langchain.chains import ConversationalRetrievalChain
59
- from htmlTemplates import css, bot_template, user_template
60
- from langchain.llms import LlamaCpp # For loading transformer models.
61
- from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
62
- import tempfile # ์ž„์‹œ ํŒŒ์ผ์„ ์ƒ์„ฑํ•˜๊ธฐ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ์ž…๋‹ˆ๋‹ค.
63
- import os
64
- from huggingface_hub import hf_hub_download # Hugging Face Hub์—์„œ ๋ชจ๋ธ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ธฐ ์œ„ํ•œ ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
65
-
66
-
67
- # PDF ๋ฌธ์„œ๋กœ๋ถ€ํ„ฐ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
68
- def get_pdf_text(pdf_docs):
69
- temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
70
- temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
71
- with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
72
- f.write(pdf_docs.getvalue()) # PDF ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
73
- pdf_loader = PyPDFLoader(temp_filepath) # PyPDFLoader๋ฅผ ์‚ฌ์šฉํ•ด PDF๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
74
- pdf_doc = pdf_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
75
- return pdf_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
76
-
77
-
78
- # ๊ณผ์ œ
79
- # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
80
- def get_text_file(docs):
81
- with NamedTemporaryFile() as temp_file:
82
- temp_file.write(docs.getvalue())
83
- temp_file.seek(0)
84
-
85
- # ํ…์ŠคํŠธ ํŒŒ์ผ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๋Š” ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
86
- text_content = temp_file.read().decode('utf-8')
87
-
88
- return text_content
89
-
90
-
91
- import csv
92
- import json
93
- from tempfile import NamedTemporaryFile
94
-
95
- def get_csv_file(docs):
96
- with NamedTemporaryFile() as temp_file:
97
- temp_file.write(docs.getvalue())
98
- temp_file.seek(0)
99
-
100
- csv_data = []
101
- csv_reader = csv.reader(temp_file)
102
- for row in csv_reader:
103
- csv_data.append(row)
104
- return csv_data
105
-
106
- def get_json_file(docs):
107
- with NamedTemporaryFile() as temp_file:
108
- temp_file.write(docs.getvalue())
109
- temp_file.seek(0)
110
-
111
- json_data = json.load(temp_file)
112
- return json_data
113
-
114
-
115
- # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
116
- def get_text_chunks(documents):
117
- text_splitter = RecursiveCharacterTextSplitter(
118
- chunk_size=1000, # ์ฒญํฌ์˜ ํฌ๊ธฐ๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
119
- chunk_overlap=200, # ์ฒญํฌ ์‚ฌ์ด์˜ ์ค‘๋ณต์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
120
- length_function=len # ํ…์ŠคํŠธ์˜ ๊ธธ์ด๋ฅผ ์ธก์ •ํ•˜๋Š” ํ•จ์ˆ˜๋ฅผ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
121
- )
122
-
123
- documents = text_splitter.split_documents(documents) # ๋ฌธ์„œ๋“ค์„ ์ฒญํฌ๋กœ ๋‚˜๋ˆ•๋‹ˆ๋‹ค.
124
- return documents # ๋‚˜๋ˆˆ ์ฒญํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
125
-
126
-
127
- # ํ…์ŠคํŠธ ์ฒญํฌ๋“ค๋กœ๋ถ€ํ„ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
128
- def get_vectorstore(text_chunks):
129
- # ์›ํ•˜๋Š” ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
130
- embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2',
131
- model_kwargs={'device': 'cpu'}) # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
132
- vectorstore = FAISS.from_documents(text_chunks, embeddings) # FAISS ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
133
- return vectorstore # ์ƒ์„ฑ๋œ ๋ฒกํ„ฐ ์Šคํ† ์–ด๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
134
-
135
-
136
- def get_conversation_chain(vectorstore):
137
- model_name_or_path = 'TheBloke/Llama-2-7B-chat-GGUF'
138
- model_basename = 'llama-2-7b-chat.Q2_K.gguf'
139
- model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
140
-
141
- llm = LlamaCpp(model_path=model_path,
142
- n_ctx=4086,
143
- input={"temperature": 0.75, "max_length": 2000, "top_p": 1},
144
- verbose=True, )
145
- # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ €์žฅํ•˜๊ธฐ ์œ„ํ•œ ๋ฉ”๋ชจ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
146
- memory = ConversationBufferMemory(
147
- memory_key='chat_history', return_messages=True)
148
- # ๋Œ€ํ™” ๊ฒ€์ƒ‰ ์ฒด์ธ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
149
- conversation_chain = ConversationalRetrievalChain.from_llm(
150
- llm=llm,
151
- retriever=vectorstore.as_retriever(),
152
- memory=memory
153
- )
154
- return conversation_chain # ์ƒ์„ฑ๋œ ๋Œ€ํ™” ์ฒด์ธ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
155
-
156
-
157
- # ์‚ฌ์šฉ์ž ์ž…๋ ฅ์„ ์ฒ˜๋ฆฌํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
158
- def handle_userinput(user_question):
159
- print('user_question => ', user_question)
160
- # ๋Œ€ํ™” ์ฒด์ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
161
- response = st.session_state.conversation({'question': user_question})
162
- # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
163
- st.session_state.chat_history = response['chat_history']
164
-
165
- for i, message in enumerate(st.session_state.chat_history):
166
- if i % 2 == 0:
167
- st.write(user_template.replace(
168
- "{{MSG}}", message.content), unsafe_allow_html=True)
169
- else:
170
- st.write(bot_template.replace(
171
- "{{MSG}}", message.content), unsafe_allow_html=True)
172
-
173
-
174
- def main():
175
- load_dotenv()
176
- st.set_page_config(page_title="Chat with multiple Files",
177
- page_icon=":books:")
178
- st.write(css, unsafe_allow_html=True)
179
-
180
- if "conversation" not in st.session_state:
181
- st.session_state.conversation = None
182
- if "chat_history" not in st.session_state:
183
- st.session_state.chat_history = None
184
-
185
- st.header("Chat with multiple Files:")
186
- user_question = st.text_input("Ask a question about your documents:")
187
- if user_question:
188
- handle_userinput(user_question)
189
-
190
- with st.sidebar:
191
- st.subheader("Your documents")
192
- docs = st.file_uploader(
193
- "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
194
- if st.button("Process"):
195
- with st.spinner("Processing"):
196
- # get pdf text
197
- doc_list = []
198
-
199
- for file in docs:
200
- print('file - type : ', file.type)
201
- if file.type == 'text/plain':
202
- # file is .txt
203
- doc_list.extend(get_text_file(file))
204
- elif file.type in ['application/octet-stream', 'application/pdf']:
205
- # file is .pdf
206
- doc_list.extend(get_pdf_text(file))
207
- elif file.type == 'text/csv':
208
- # file is .csv
209
- doc_list.extend(get_csv_file(file))
210
- elif file.type == 'application/json':
211
- # file is .json
212
- doc_list.extend(get_json_file(file))
213
-
214
- # get the text chunks
215
- text_chunks = get_text_chunks(doc_list)
216
-
217
- # create vector store
218
- vectorstore = get_vectorstore(text_chunks)
219
-
220
- # create conversation chain
221
- st.session_state.conversation = get_conversation_chain(
222
- vectorstore)
223
-
224
-
225
- if __name__ == '__main__':
226
- main()