Spaces:
Sleeping
Sleeping
import os | |
import openai | |
import streamlit as st | |
from dotenv import load_dotenv | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.llms import OpenAI | |
from langchain.chains import RetrievalQA | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import DirectoryLoader | |
# from streamlit_chat import message | |
# ์คํธ๋ฆผ๋ฆฟ์ด๋ ํ์ด์ฌ, vs์ฝ๋ ๊ฐ์ ํด๋์ | |
# ์ด ํ์ผ๋ ๊ฐ์ ํด๋์ ๋ฃ์ด์ผํ๋์ง๋ ๋ชจ๋ฅด๊ฒ ์,, | |
# import streamlit as st | |
# from io import StringIO | |
# ๊ฒฝ๋ก /Users/gyuribyun/Documents/legoapp | |
# ๊ฒฝ๋ก ํฐ๋ฏธ๋์ ์ ๋ ฅ ํ(cd ๊ฒฝ๋ก ๋ณต๋ถ) | |
# streamlit run app.py๋ก ๋งํฌ ๋ฐ๊ธฐ | |
# ํ์ดํ ์ ์ฉ, # ํน์ ์ด๋ชจํฐ์ฝ ์ฝ์ ์์ | |
# emoji: https://streamlit-emoji-shortcodes-streamlit-app-gwckff.streamlit.app/ | |
st.title(':robot_face:ํ๊ตญ์ฌ๋ด? ๋ ์ ๋ง ๋๋ํ๋?') | |
# ์บก์ ์ ์ฉ | |
st.caption('ํ๊ตญ์ฌ ๊ต๊ณผ์๋ฅผ ์ฝ๊ณ ์ดํดํ์ฌ ๋ต๋ณ์ ์ ๊ณตํ๋ ๋ก๋ด์ ๋๋ค. ๋ก๋ด์ด ๊ต๊ณผ์๋ฅผ ์ ๋๋ก ์ดํดํ๋์ง ํ์ธํด๋ณด์ธ์!') | |
# ๋งํฌ๋ค์ด ๋ถ๊ฐ์ค๋ช | |
st.markdown('###### ์ง๋ฌธ, ์์ฝ ๋ฑ ๋ค์ํ ๋ถํ์ ํด ๋ณด์ธ์! ๊ต๊ณผ์์ ์ด๋ค ๋ถ๋ถ์ ์ฐธ๊ณ ํ๋์ง ๋น๊ตํ๋ฉฐ ํ๊ตญ์ฌ๋ด:robot_face:์ด ๊ต๊ณผ์๋ฅผ ์ ๋๋ก ์ดํดํ๋์ง ํ์ธํด๋ณด์ธ์!:sparkles:') | |
api_key = st.text_input(label='OpenAI API ํค๋ฅผ ์ ๋ ฅํ์ธ์', type='password') | |
if api_key: | |
# OpenAI API๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํ ์ฒ๋ฆฌ ๊ณผ์ ์ ํจ์๋ก ์ ์ | |
def initialize_openai_processing(api_key): | |
loader = DirectoryLoader('./khistory_data', glob="*.txt", loader_cls=TextLoader) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100) | |
texts = text_splitter.split_documents(documents) | |
persist_directory = 'db' | |
#embedding = OpenAIEmbeddings() | |
embedding = OpenAIEmbeddings(api_key=api_key) # API ํค๋ฅผ ์์ฑ์์ ์ ๋ฌ | |
vectordb = Chroma.from_documents( | |
documents=texts, | |
embedding=embedding, | |
persist_directory=persist_directory) | |
vectordb.persist() | |
vectordb = None | |
vectordb = Chroma( | |
persist_directory=persist_directory, | |
embedding_function=embedding) | |
retriever = vectordb.as_retriever(search_kwargs={"k": 3}) | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=OpenAI(), | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=True) | |
return embedding, vectordb, qa_chain | |
# ํจ์ ํธ์ถ๋ก ์ด๊ธฐํ ๊ณผ์ ์ํ | |
embedding, vectordb, qa_chain = initialize_openai_processing(api_key) | |
# ํ ์คํธ ์ ๋ ฅ | |
query = st.text_input( | |
label='ํ๊ตญ์ฌ๋ด์๊ฒ ์ง๋ฌธํด๋ณด์ธ์!', | |
placeholder='์์: ๋ํ ๋๋ฏผ ์ด๋์ ์ ์ผ์ด๋ฌ๋์?' | |
) | |
st.write(f'์ง๋ฌธ ๋ด์ฉ: :violet[{query}]') | |
# ๋ฒํผ ํด๋ฆญ | |
button = st.button(':robot_face:ํ๊ตญ์ฌ๋ด์๊ฒ ๋ฌผ์ด๋ณด๊ธฐ') | |
if button: | |
llm_response = qa_chain(query) | |
#process_llm_response(llm_response) | |
result = llm_response.get('result') | |
source_documents1 = llm_response.get('source_documents')[0] | |
source_documents2 = llm_response.get('source_documents')[1] | |
source_documents3 = llm_response.get('source_documents')[2] | |
st.write('๊ฒฐ๊ณผ: ', f'{result}') | |
st.write('๊ต๊ณผ์ ๋ด์ฉ 1: 'f'{source_documents1}') | |
st.write('๊ต๊ณผ์ ๋ด์ฉ 2: 'f'{source_documents2}') | |
st.write('๊ต๊ณผ์ ๋ด์ฉ 3: 'f'{source_documents3}') | |