Spaces:
Runtime error
Runtime error
linjieccc
commited on
Commit
·
b1b4090
1
Parent(s):
09c6b2f
update app.py
Browse files
app.py
CHANGED
@@ -121,7 +121,7 @@ examples = [
|
|
121 |
],
|
122 |
[
|
123 |
"receipt.png",
|
124 |
-
"
|
125 |
],
|
126 |
[
|
127 |
"poster.png",
|
@@ -166,7 +166,7 @@ prompt_files = {
|
|
166 |
"Which quality component has the icon of a pen in it?": "website_design_guide.jpeg",
|
167 |
"Which gift idea needs a printer?": "poster.png",
|
168 |
"患者さんは何でお金を払いますか。": "medical_bill_2.jpg",
|
169 |
-
"
|
170 |
"试卷当前部分考察什么内容?": "exam.png",
|
171 |
"For Rage, who is the author listed as? ": "book.png",
|
172 |
}
|
@@ -237,11 +237,22 @@ def np2base64(image_np):
|
|
237 |
return base64_str
|
238 |
|
239 |
|
240 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
if path.endswith(".pdf"):
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
243 |
else:
|
244 |
-
base64_str = base64.b64encode(
|
245 |
return base64_str
|
246 |
|
247 |
|
@@ -255,7 +266,7 @@ def process_prompt(prompt, document, lang="ch"):
|
|
255 |
|
256 |
url = f"https://aip.baidubce.com/rpc/2.0/nlp-itec/poc/docprompt?access_token={access_token}"
|
257 |
|
258 |
-
base64_str =
|
259 |
|
260 |
r = requests.post(url, json={"doc": base64_str, "prompt": [prompt], "lang": lang})
|
261 |
response = r.json()
|
|
|
121 |
],
|
122 |
[
|
123 |
"receipt.png",
|
124 |
+
"เบอร์โทรร้านอะไรคะ?"
|
125 |
],
|
126 |
[
|
127 |
"poster.png",
|
|
|
166 |
"Which quality component has the icon of a pen in it?": "website_design_guide.jpeg",
|
167 |
"Which gift idea needs a printer?": "poster.png",
|
168 |
"患者さんは何でお金を払いますか。": "medical_bill_2.jpg",
|
169 |
+
"เบอร์โทรร้านอะไรคะ?": "receipt.png",
|
170 |
"试卷当前部分考察什么内容?": "exam.png",
|
171 |
"For Rage, who is the author listed as? ": "book.png",
|
172 |
}
|
|
|
237 |
return base64_str
|
238 |
|
239 |
|
240 |
+
def get_base64(path):
|
241 |
+
if path.startswith("http://") or path.startswith("https://"):
|
242 |
+
resp = requests.get(path, allow_redirects=True, stream=True)
|
243 |
+
b = resp.raw
|
244 |
+
else:
|
245 |
+
b = open(path, "rb")
|
246 |
+
|
247 |
if path.endswith(".pdf"):
|
248 |
+
images_list = []
|
249 |
+
pdfreader = PdfReader(stream=b.read())
|
250 |
+
for p_no in range(0, min(pdfreader._inpdf.page_count, 1)):
|
251 |
+
img_np = pdfreader.get_page_image(pageno=p_no)
|
252 |
+
images_list.append(img_np)
|
253 |
+
base64_str = np2base64(images_list[0])
|
254 |
else:
|
255 |
+
base64_str = base64.b64encode(b.read()).decode()
|
256 |
return base64_str
|
257 |
|
258 |
|
|
|
266 |
|
267 |
url = f"https://aip.baidubce.com/rpc/2.0/nlp-itec/poc/docprompt?access_token={access_token}"
|
268 |
|
269 |
+
base64_str = get_base64(document)
|
270 |
|
271 |
r = requests.post(url, json={"doc": base64_str, "prompt": [prompt], "lang": lang})
|
272 |
response = r.json()
|