MSEAJYTHTH commited on
Commit
f0f1b12
·
verified ·
1 Parent(s): 1622169

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import HuggingFaceDatasetLoader
2
+ from langchain_community.document_loaders.csv_loader import CSVLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
7
+ from transformers import AutoTokenizer, pipeline
8
+ from langchain import HuggingFacePipeline
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.schema import AIMessage, HumanMessage, SystemMessage
11
+ from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
12
+ from langchain.prompts import ChatPromptTemplate
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain_core.documents import Document
15
+ import gradio as gr
16
+ import os
17
+ import difflib
18
+
19
+
20
+ modelPath = "MSEAJYTHTH/NFPC"
21
+
22
+ model_kwargs = {'device':'cpu'}
23
+
24
+ encode_kwargs = {'normalize_embeddings': False}
25
+
26
+ embeddings = HuggingFaceEmbeddings(
27
+ model_name=modelPath, # Provide the pre-trained model's path
28
+ model_kwargs=model_kwargs, # Pass the model configuration options
29
+ encode_kwargs=encode_kwargs # Pass the encoding options
30
+ )
31
+
32
+
33
+ def find_best_page_content(question, keywords, db):
34
+
35
+ db = FAISS.load_local("MSEAJYTHTH/NFPC/faiss_index", embeddings, allow_dangerous_deserialization=True)
36
+ results = db.similarity_search(question, k=50, fetch_k=50)
37
+ keywords = ', '.join(keywords) #
38
+
39
+ max_similarity = 0 # 최대 유사도 초기화
40
+ best_page_content = "" # 가장 유사한 결과 초기화
41
+ page_content = ""
42
+ metadata = ""
43
+ similarity_score = ""
44
+
45
+ for i in range(50) :
46
+ page_content = results[i].page_content
47
+ metadata = results[i].metadata['source']
48
+ similarity_score = difflib.SequenceMatcher(None, keywords, metadata).ratio()
49
+
50
+ if similarity_score > max_similarity:
51
+ max_similarity = similarity_score
52
+ best_page_content = page_content
53
+
54
+ return best_page_content
55
+
56
+
57
+ gr.Interface(
58
+ fn=find_best_page_content,
59
+ inputs=[
60
+ gr.Textbox(label="질문을 입력해주세요", placeholder="물분무소화설비 화재안전성능기준에서 헤드 관련 기준을 찾아줘"),
61
+ gr.Textbox(label="Keywords를 입력해주세요", placeholder="물분무, 헤드"),
62
+ ],
63
+ outputs=gr.Textbox(label="검색 결과", type="text"), # 수정된 부분
64
+ title="화재안전성능기준 Question & Answering",
65
+ description="화재안전성능기준를 찾아주는 Chatbot",
66
+ theme="soft",
67
+ examples=[["물분무소화설비 헤드에 대한 것을 찾아줘", "물분무, 헤드"]],
68
+ ).launch()