linjieccc commited on
Commit
b1b4090
·
1 Parent(s): 09c6b2f

update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -121,7 +121,7 @@ examples = [
121
  ],
122
  [
123
  "receipt.png",
124
- "เบอร์โทรร้านอะไรคะ"
125
  ],
126
  [
127
  "poster.png",
@@ -166,7 +166,7 @@ prompt_files = {
166
  "Which quality component has the icon of a pen in it?": "website_design_guide.jpeg",
167
  "Which gift idea needs a printer?": "poster.png",
168
  "患者さんは何でお金を払いますか。": "medical_bill_2.jpg",
169
- "เบอร์โทรร้านอะไรคะ": "receipt.png",
170
  "试卷当前部分考察什么内容?": "exam.png",
171
  "For Rage, who is the author listed as? ": "book.png",
172
  }
@@ -237,11 +237,22 @@ def np2base64(image_np):
237
  return base64_str
238
 
239
 
240
- def img2base64(path):
 
 
 
 
 
 
241
  if path.endswith(".pdf"):
242
- base64_str = np2base64(load_document(path)[0])
 
 
 
 
 
243
  else:
244
- base64_str = base64.b64encode(open(path, 'rb').read()).decode()
245
  return base64_str
246
 
247
 
@@ -255,7 +266,7 @@ def process_prompt(prompt, document, lang="ch"):
255
 
256
  url = f"https://aip.baidubce.com/rpc/2.0/nlp-itec/poc/docprompt?access_token={access_token}"
257
 
258
- base64_str = img2base64(document)
259
 
260
  r = requests.post(url, json={"doc": base64_str, "prompt": [prompt], "lang": lang})
261
  response = r.json()
 
121
  ],
122
  [
123
  "receipt.png",
124
+ "เบอร์โทรร้านอะไรคะ?"
125
  ],
126
  [
127
  "poster.png",
 
166
  "Which quality component has the icon of a pen in it?": "website_design_guide.jpeg",
167
  "Which gift idea needs a printer?": "poster.png",
168
  "患者さんは何でお金を払いますか。": "medical_bill_2.jpg",
169
+ "เบอร์โทรร้านอะไรคะ?": "receipt.png",
170
  "试卷当前部分考察什么内容?": "exam.png",
171
  "For Rage, who is the author listed as? ": "book.png",
172
  }
 
237
  return base64_str
238
 
239
 
240
+ def get_base64(path):
241
+ if path.startswith("http://") or path.startswith("https://"):
242
+ resp = requests.get(path, allow_redirects=True, stream=True)
243
+ b = resp.raw
244
+ else:
245
+ b = open(path, "rb")
246
+
247
  if path.endswith(".pdf"):
248
+ images_list = []
249
+ pdfreader = PdfReader(stream=b.read())
250
+ for p_no in range(0, min(pdfreader._inpdf.page_count, 1)):
251
+ img_np = pdfreader.get_page_image(pageno=p_no)
252
+ images_list.append(img_np)
253
+ base64_str = np2base64(images_list[0])
254
  else:
255
+ base64_str = base64.b64encode(b.read()).decode()
256
  return base64_str
257
 
258
 
 
266
 
267
  url = f"https://aip.baidubce.com/rpc/2.0/nlp-itec/poc/docprompt?access_token={access_token}"
268
 
269
+ base64_str = get_base64(document)
270
 
271
  r = requests.post(url, json={"doc": base64_str, "prompt": [prompt], "lang": lang})
272
  response = r.json()