Spaces:
Sleeping
Sleeping
File size: 3,410 Bytes
7447dda f4684a2 7447dda f4684a2 7447dda f4684a2 7447dda 6e77103 7447dda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import openai
import streamlit as st
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
# ํ์ดํ ์ ์ฉ, # ํน์ ์ด๋ชจํฐ์ฝ ์ฝ์
์์
# emoji: https://streamlit-emoji-shortcodes-streamlit-app-gwckff.streamlit.app/
st.title(':robot_face:ํ๊ตญ์ฌ๋ด? ๋ ์ ๋ง ๋๋ํ๋?')
# ์บก์
์ ์ฉ
st.caption('ํ๊ตญ์ฌ ๊ต๊ณผ์๋ฅผ ์ฝ๊ณ ์ดํดํ์ฌ ๋ต๋ณ์ ์ ๊ณตํ๋ ๋ก๋ด์
๋๋ค. ๋ก๋ด์ด ๊ต๊ณผ์๋ฅผ ์ ๋๋ก ์ดํดํ๋์ง ํ์ธํด๋ณด์ธ์!')
# ๋งํฌ๋ค์ด ๋ถ๊ฐ์ค๋ช
st.markdown('###### ์ง๋ฌธ, ์์ฝ ๋ฑ ๋ค์ํ ๋ถํ์ ํด ๋ณด์ธ์! ๊ต๊ณผ์์ ์ด๋ค ๋ถ๋ถ์ ์ฐธ๊ณ ํ๋์ง ๋น๊ตํ๋ฉฐ ํ๊ตญ์ฌ๋ด:robot_face:์ด ๊ต๊ณผ์๋ฅผ ์ ๋๋ก ์ดํดํ๋์ง ํ์ธํด๋ณด์ธ์!:sparkles:')
api_key = st.text_input(label='OpenAI API ํค๋ฅผ ์
๋ ฅํ์ธ์', type='password')
OpenAI.api_key = api_key
if api_key:
# OpenAI API๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํ ์ฒ๋ฆฌ ๊ณผ์ ์ ํจ์๋ก ์ ์
def initialize_openai_processing():
loader = DirectoryLoader('./khistory_data', glob="*.txt", loader_cls=TextLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
persist_directory = 'db'
embedding = OpenAIEmbeddings()
#embedding = OpenAIEmbeddings(api_key=api_key) # API ํค๋ฅผ ์์ฑ์์ ์ ๋ฌ
vectordb = Chroma.from_documents(
documents=texts,
embedding=embedding,
persist_directory=persist_directory)
vectordb.persist()
vectordb = None
vectordb = Chroma(
persist_directory=persist_directory,
embedding_function=embedding)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=retriever,
return_source_documents=True)
return embedding, vectordb, qa_chain
# ํจ์ ํธ์ถ๋ก ์ด๊ธฐํ ๊ณผ์ ์ํ
embedding, vectordb, qa_chain = initialize_openai_processing()
# ํ
์คํธ ์
๋ ฅ
query = st.text_input(
label='ํ๊ตญ์ฌ๋ด์๊ฒ ์ง๋ฌธํด๋ณด์ธ์!',
placeholder='์์: ๋ํ ๋๋ฏผ ์ด๋์ ์ ์ผ์ด๋ฌ๋์?'
)
st.write(f'์ง๋ฌธ ๋ด์ฉ: :violet[{query}]')
# ๋ฒํผ ํด๋ฆญ
button = st.button(':robot_face:ํ๊ตญ์ฌ๋ด์๊ฒ ๋ฌผ์ด๋ณด๊ธฐ')
if button:
llm_response = qa_chain(query)
#process_llm_response(llm_response)
result = llm_response.get('result')
source_documents1 = llm_response.get('source_documents')[0]
source_documents2 = llm_response.get('source_documents')[1]
source_documents3 = llm_response.get('source_documents')[2]
st.write('๊ฒฐ๊ณผ: ', f'{result}')
st.write('๊ต๊ณผ์ ๋ด์ฉ 1: 'f'{source_documents1}')
st.write('๊ต๊ณผ์ ๋ด์ฉ 2: 'f'{source_documents2}')
st.write('๊ต๊ณผ์ ๋ด์ฉ 3: 'f'{source_documents3}')
|