Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,39 +1,54 @@
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import io
|
3 |
-
import
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
import zipfile
|
4 |
import os
|
5 |
import io
|
6 |
+
import nltk
|
7 |
+
import openai
|
8 |
+
|
9 |
+
# Put your OpenAI API key here
|
10 |
+
openai.api_key = os.getenv('OpenAPI')
|
11 |
+
|
12 |
+
def pdf_to_text(file, user_prompt):
|
13 |
+
z = zipfile.ZipFile(file.name, 'r')
|
14 |
+
texts = []
|
15 |
+
for filename in z.namelist():
|
16 |
+
if filename.endswith('.pdf'):
|
17 |
+
pdf_file_data = z.read(filename)
|
18 |
+
pdf_file_io = io.BytesIO(pdf_file_data)
|
19 |
+
pdf = PdfReader(pdf_file_io)
|
20 |
+
text = ''
|
21 |
+
for page in pdf.pages:
|
22 |
+
text += page.extract_text()
|
23 |
+
# Tokenize text
|
24 |
+
tokens = nltk.word_tokenize(text)
|
25 |
+
# If tokens are more than 2000, split into chunks
|
26 |
+
if len(tokens) > 2000:
|
27 |
+
for i in range(0, len(tokens), 2000):
|
28 |
+
chunk = tokens[i:i + 2000]
|
29 |
+
chunk_str = ' '.join(chunk)
|
30 |
+
# Using OpenAI API
|
31 |
+
response = openai.ChatCompletion.create(
|
32 |
+
model="gpt-3.5-turbo",
|
33 |
+
messages=[
|
34 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
35 |
+
{"role": "user", "content": user_prompt},
|
36 |
+
{"role": "user", "content": chunk_str},
|
37 |
+
]
|
38 |
+
)
|
39 |
+
texts.append(response['choices'][0]['message']['content'])
|
40 |
+
else:
|
41 |
+
# Using OpenAI API
|
42 |
+
response = openai.ChatCompletion.create(
|
43 |
+
model="gpt-3.5-turbo",
|
44 |
+
messages=[
|
45 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
46 |
+
{"role": "user", "content": user_prompt},
|
47 |
+
{"role": "user", "content": text},
|
48 |
+
]
|
49 |
+
)
|
50 |
+
texts.append(response['choices'][0]['message']['content'])
|
51 |
+
return '\n'.join(texts)
|
52 |
+
|
53 |
+
iface = gr.Interface(fn=pdf_to_text, inputs=["file", "text"], outputs="text")
|
54 |
+
iface.launch(share=True)
|