Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import HuggingFaceDatasetLoader | |
from langchain_community.document_loaders.csv_loader import CSVLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from transformers import AutoTokenizer, AutoModelForQuestionAnswering | |
from transformers import AutoTokenizer, pipeline | |
from langchain import HuggingFacePipeline | |
from langchain.chains import RetrievalQA | |
from langchain.schema import AIMessage, HumanMessage, SystemMessage | |
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_community.vectorstores import FAISS | |
from langchain_core.documents import Document | |
import gradio as gr | |
import os | |
import difflib | |
modelPath = "MSEAJYTHTH/NFPC" | |
model_kwargs = {'device':'cpu'} | |
encode_kwargs = {'normalize_embeddings': False} | |
embeddings = HuggingFaceEmbeddings( | |
model_name=modelPath, # Provide the pre-trained model's path | |
model_kwargs=model_kwargs, # Pass the model configuration options | |
encode_kwargs=encode_kwargs # Pass the encoding options | |
) | |
# 두 파일을 업로드한 후에는 다음과 같이 코드를 수정할 수 있습니다. | |
db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) | |
def find_best_page_content(question, keywords, db): | |
db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) | |
results = db.similarity_search(question, k=200, fetch_k=200) | |
keywords = ', '.join(keywords) # | |
max_similarity = 0 # 최대 유사도 초기화 | |
best_page_content = "" # 가장 유사한 결과 초기화 | |
page_content = "" # 초기화 | |
metadata = "" # 초기화 | |
similarity_score = "" # 초기화 | |
for i in range(200) : | |
page_content = results[i].page_content | |
metadata = results[i].metadata['source'] | |
similarity_score = difflib.SequenceMatcher(None, keywords, metadata).ratio() | |
if similarity_score > max_similarity: | |
max_similarity = similarity_score | |
best_page_content = page_content | |
return best_page_content | |
gr.Interface( | |
fn=find_best_page_content, | |
inputs=[ | |
gr.Textbox(label="질문을 입력해주세요", placeholder="물분무소화설비 화재안전성능기준에서 헤드 관련 기준을 찾아줘"), | |
gr.Textbox(label="Keywords를 입력해주세요", placeholder="물분무, 헤드"), | |
], | |
outputs=gr.Textbox(label="검색 결과", type="text"), # 수정된 부분 | |
title="화재안전성능기준 Question & Answering", | |
description="화재안전성능기준를 찾아주는 Chatbot", | |
theme="soft", | |
examples=[["물분무소화설비 헤드에 대한 것을 찾아줘", "물분무, 헤드"]], | |
).launch(share=True) |