DevBM commited on
Commit
fd80405
·
verified ·
1 Parent(s): 6f9decb

Upload 2 files

Browse files
Files changed (2) hide show
  1. b.py +49 -0
  2. requirements.txt +13 -0
b.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from PyPDF2 import PdfReader
4
+ import PyPDF2
5
+ import fitz
6
+ import os
7
+ import nltk
8
+
9
+
10
+ def get_pdf_text(pdf_docs):
11
+ text = ""
12
+ for pdf in pdf_docs:
13
+ pdf_reader = PdfReader(pdf)
14
+ for page in pdf_reader.pages:
15
+ text += page.extract_text()
16
+ return text
17
+
18
+ def main():
19
+ st.title('Question Generator from PDFs')
20
+ pipe = pipeline(
21
+ task = 'text2text-generation',
22
+ model = 'ramsrigouthamg/t5_squad_v1'
23
+ )
24
+ file = st.file_uploader(label='Upload',accept_multiple_files=True)
25
+ pr = st.button(label='Start')
26
+ if pr:
27
+ st.write('Hi')
28
+ raw_text = get_pdf_text(file)
29
+ sentences = nltk.sent_tokenize(text=raw_text)
30
+ # st.write(sts)
31
+ # for i in sentences:
32
+ # st.write(i)
33
+ questions = []
34
+ st.subheader("Generated Questions are: ")
35
+ s = pipe(sentences)
36
+ for i in s:
37
+ questions.append(i['generated_text'][10:])
38
+ st.write(i['generated_text'][10:])
39
+ if st.toggle(label='Show Pipeline Output'):
40
+ st.write(s)
41
+ if st.toggle(label='Show Questions list'):
42
+ st.write(questions)
43
+ # for i in sts:
44
+ # x = pipe(i)
45
+ # questions.append(x)
46
+ # st.write(x)
47
+
48
+ if __name__ == '__main__':
49
+ main()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.184
2
+ PyPDF2==3.0.1
3
+ python-dotenv==1.0.0
4
+ streamlit==1.18.1
5
+ faiss-cpu==1.7.4
6
+ altair==4
7
+ tiktoken==0.4.0
8
+ # uncomment to use huggingface llms
9
+ huggingface-hub==0.14.1
10
+
11
+ # uncomment to use instructor embeddings
12
+ InstructorEmbedding==1.0.1
13
+ sentence-transformers==2.2.2