Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,20 @@
|
|
1 |
# https://qiita.com/nekoniii3/items/5acf764af65212d9f04f
|
2 |
|
3 |
import gradio as gr
|
4 |
-
import random
|
5 |
-
import time
|
6 |
|
7 |
import os
|
8 |
|
9 |
from langchain_community.document_loaders import PyMuPDFLoader
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
-
# from langchain_community.chat_models import ChatOpenAI
|
12 |
from langchain_openai import ChatOpenAI
|
13 |
from langchain_community.vectorstores import Chroma
|
14 |
from langchain.chains import RetrievalQA
|
15 |
-
from
|
16 |
-
|
17 |
|
18 |
|
19 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
20 |
-
os.environ["OPENAI_API_KEY"] = "sk-
|
21 |
|
22 |
file_name1 = 'ALV2_ALV3DTUๆไฝใใใฅใขใซDTU-V3SET01.pdf'
|
23 |
file_name2 = 'ALV3PCใตใผใ_ใฝใใใฆใงใขๆไฝใใใฅใขใซ_็ปๅใใกใคใซๅไป.pdf'
|
@@ -42,7 +39,8 @@ texts3 = text_splitter.split_documents(documents3)
|
|
42 |
texts4 = text_splitter.split_documents(documents4)
|
43 |
texts = texts1 + texts2 + texts3 + texts4
|
44 |
|
45 |
-
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
|
|
46 |
vectordb = Chroma.from_documents(texts, embeddings)
|
47 |
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.05)
|
48 |
|
@@ -60,14 +58,45 @@ def save_image_filepath(filepath: str):
|
|
60 |
shutil.copy(filepath, './filepath{}'.format(file_extension))
|
61 |
pass
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
with gr.Blocks() as demo:
|
64 |
chatbot = gr.Chatbot()
|
65 |
-
# with gr.Row():
|
66 |
-
# with gr.Column():
|
67 |
-
# image_input_filepath = gr.Image(type='filepath')
|
68 |
-
# image_button_filepath = gr.Button("filepath")
|
69 |
-
|
70 |
-
# image_button_filepath.click(save_image_filepath, inputs=image_input_filepath)
|
71 |
|
72 |
msg = gr.Textbox()
|
73 |
|
@@ -76,23 +105,56 @@ with gr.Blocks() as demo:
|
|
76 |
reply=reply2['result']
|
77 |
|
78 |
for sd in reply2["source_documents"]:
|
79 |
-
page_content = str(sd.page_content)
|
80 |
source = str(sd.metadata["source"])
|
81 |
-
page =
|
82 |
-
|
83 |
-
print("
|
|
|
84 |
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
return "", history + [[user_message, reply]]
|
88 |
|
89 |
def bot(history):
|
90 |
yield history
|
91 |
-
# save_image_filepath("./IMG_yosuke2.jpg")
|
92 |
|
93 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
|
94 |
bot, chatbot, chatbot
|
95 |
)
|
96 |
|
97 |
demo.queue()
|
98 |
-
demo.launch(share=True)
|
|
|
1 |
# https://qiita.com/nekoniii3/items/5acf764af65212d9f04f
|
2 |
|
3 |
import gradio as gr
|
|
|
|
|
4 |
|
5 |
import os
|
6 |
|
7 |
from langchain_community.document_loaders import PyMuPDFLoader
|
8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_community.vectorstores import Chroma
|
11 |
from langchain.chains import RetrievalQA
|
12 |
+
# from langchain_openai import OpenAIEmbeddings
|
13 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
14 |
|
15 |
|
16 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
17 |
+
os.environ["OPENAI_API_KEY"] = "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"
|
18 |
|
19 |
file_name1 = 'ALV2_ALV3DTUๆไฝใใใฅใขใซDTU-V3SET01.pdf'
|
20 |
file_name2 = 'ALV3PCใตใผใ_ใฝใใใฆใงใขๆไฝใใใฅใขใซ_็ปๅใใกใคใซๅไป.pdf'
|
|
|
39 |
texts4 = text_splitter.split_documents(documents4)
|
40 |
texts = texts1 + texts2 + texts3 + texts4
|
41 |
|
42 |
+
# embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
|
43 |
+
embeddings = HuggingFaceEmbeddings(model_name="oshizo/sbert-jsnli-luke-japanese-base-lite")
|
44 |
vectordb = Chroma.from_documents(texts, embeddings)
|
45 |
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.05)
|
46 |
|
|
|
58 |
shutil.copy(filepath, './filepath{}'.format(file_extension))
|
59 |
pass
|
60 |
|
61 |
+
import boto3
|
62 |
+
s3 = boto3.client('s3',
|
63 |
+
aws_access_key_id="AKIA6ENMUHYQ7KWAEV7Q",
|
64 |
+
aws_secret_access_key="cCGgc2MSwmt8EizmuSBlUJArL1bvzWylqfFha0c6",
|
65 |
+
region_name='ap-northeast-1'
|
66 |
+
)
|
67 |
+
|
68 |
+
|
69 |
+
# ็ปๅใฎURLๅบๅๆฉ่ฝ
|
70 |
+
def get_public_url(bucket, target_object_path):
|
71 |
+
"""
|
72 |
+
ๅฏพ่ฑกใฎS3ใใกใคใซใฎURLใๅๅพใใ
|
73 |
+
|
74 |
+
Parameters
|
75 |
+
----------
|
76 |
+
bucket: string
|
77 |
+
S3ใฎใใฑใใๅ
|
78 |
+
target_object_path: string
|
79 |
+
ๅๅพใใใS3ๅ
ใฎใใกใคใซใใน
|
80 |
+
|
81 |
+
Returns
|
82 |
+
----------
|
83 |
+
url: string
|
84 |
+
S3ไธใฎใชใใธใงใฏใใฎURL
|
85 |
+
"""
|
86 |
+
bucket_location = s3.get_bucket_location(Bucket=bucket)
|
87 |
+
return "https://s3-{0}.amazonaws.com/{1}/{2}".format(
|
88 |
+
bucket_location['LocationConstraint'],
|
89 |
+
bucket,
|
90 |
+
target_object_path)
|
91 |
+
|
92 |
+
import fitz
|
93 |
+
doc1 = fitz.open(file_name1)
|
94 |
+
doc2 = fitz.open(file_name2)
|
95 |
+
|
96 |
+
import math
|
97 |
+
|
98 |
with gr.Blocks() as demo:
|
99 |
chatbot = gr.Chatbot()
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
msg = gr.Textbox()
|
102 |
|
|
|
105 |
reply=reply2['result']
|
106 |
|
107 |
for sd in reply2["source_documents"]:
|
108 |
+
# page_content = str(sd.page_content)
|
109 |
source = str(sd.metadata["source"])
|
110 |
+
page = sd.metadata["page"]+1
|
111 |
+
page_num = str(page).zfill(3)
|
112 |
+
# print("PDF๏ผ" + source)
|
113 |
+
# print("ใใผใธ๏ผ" + page_num)
|
114 |
|
115 |
+
if source == file_name1:
|
116 |
+
# ใใผใธ็ปๅใฎURLใๅๅพ
|
117 |
+
bucket='page.dtu.manual'
|
118 |
+
key='page'+page_num+'_raster.png'
|
119 |
+
url = get_public_url(bucket, key)
|
120 |
+
reply = reply + ' <a href='+url+'>'+page_num+'</a>'
|
121 |
+
|
122 |
+
elif source == file_name2:
|
123 |
+
# ใใผใธ็ปๅใฎURLใๅๅพ
|
124 |
+
bucket='page.server.manual'
|
125 |
+
key='page'+page_num+'_raster.png'
|
126 |
+
url = get_public_url(bucket, key)
|
127 |
+
reply = reply + ' <a href='+url+'>'+page_num+'</a>'
|
128 |
+
|
129 |
+
# PDFใซ่ฒผใไปใใใ็ปๅใฎURLใๅๅพ
|
130 |
+
bucket='image.server.manual'
|
131 |
+
page2 = doc2[page]
|
132 |
+
page_annotations = page2.annots()
|
133 |
+
for annotation in page_annotations:
|
134 |
+
annotation_num = str(annotation).zfill(3)
|
135 |
+
# ๆณจ้ใฎใใญใใใฃใๅๅพ
|
136 |
+
key = annotation.info.get('content', '') # ใใผใๆณจ้ใฎใใญในใใๅๅพ
|
137 |
+
url = get_public_url(bucket, key)
|
138 |
+
reply = reply + ' <a href='+url+'>'+key+'</a>'
|
139 |
+
elif source == file_name3:
|
140 |
+
page2 = str(math.floor(1+float(page_num)/2))
|
141 |
+
url = "https://dcs.mediapress-net.com/iportal/cv.do?c=20958580000&pg="+page2+"&v=MIW10001&d=LINK_MIW"
|
142 |
+
reply = reply + ' <a href="'+url+'">'+page2+'</a>'
|
143 |
+
elif source == file_name4:
|
144 |
+
page2 = str(math.floor(1+(486+float(page_num))/2))
|
145 |
+
url = "https://dcs.mediapress-net.com/iportal/cv.do?c=20958580000&pg="+page2+"&v=MIW10001&d=LINK_MIW"
|
146 |
+
reply = reply + ' <a href="'+url+'">'+page2+'</a>'
|
147 |
+
else:
|
148 |
+
exit(0)
|
149 |
+
|
150 |
return "", history + [[user_message, reply]]
|
151 |
|
152 |
def bot(history):
|
153 |
yield history
|
|
|
154 |
|
155 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
|
156 |
bot, chatbot, chatbot
|
157 |
)
|
158 |
|
159 |
demo.queue()
|
160 |
+
demo.launch(auth=("root", "test123456"), share=True)
|