File size: 2,892 Bytes
f499646
f0f1b12
 
63d6d47
f0f1b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc6c2c0
f0f1b12
 
 
 
 
 
 
 
 
 
 
bee2308
cb3d9b4
465a36b
 
f0f1b12
 
cad4f13
d742c01
f0f1b12
 
 
 
e41777f
 
 
f0f1b12
d742c01
f0f1b12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a9410d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import gradio as gr
import os
import difflib


modelPath = "MSEAJYTHTH/NFPC"

model_kwargs = {'device':'cpu'}

encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)


# 두 파일을 업로드한 후에는 다음과 같이 코드를 수정할 수 있습니다.
db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)


def find_best_page_content(question, keywords, db):

    db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    results = db.similarity_search(question, k=200, fetch_k=200)
    keywords = ', '.join(keywords)  #

    max_similarity = 0  # 최대 유사도 초기화
    best_page_content = ""    # 가장 유사한 결과 초기화
    page_content = ""    # 초기화       
    metadata = ""    # 초기화
    similarity_score = ""    # 초기화

    for i in range(200) :
       page_content = results[i].page_content
       metadata = results[i].metadata['source']
       similarity_score = difflib.SequenceMatcher(None, keywords, metadata).ratio()

       if similarity_score > max_similarity:
             max_similarity = similarity_score
             best_page_content = page_content

    return best_page_content


gr.Interface(
    fn=find_best_page_content,
    inputs=[
        gr.Textbox(label="질문을 입력해주세요", placeholder="물분무소화설비 화재안전성능기준에서 헤드 관련 기준을 찾아줘"),
        gr.Textbox(label="Keywords를 입력해주세요", placeholder="물분무, 헤드"),
    ],
    outputs=gr.Textbox(label="검색 결과", type="text"),  # 수정된 부분
    title="화재안전성능기준 Question & Answering",
    description="화재안전성능기준를 찾아주는 Chatbot",
    theme="soft",
    examples=[["물분무소화설비 헤드에 대한 것을 찾아줘", "물분무, 헤드"]],
).launch(share=True)