GopalGoyal commited on
Commit
f3ffe02
·
1 Parent(s): 436c5b5

Add application file

Browse files
Files changed (6) hide show
  1. .gitattributes copy +35 -0
  2. .streamlit/config.toml +25 -0
  3. README copy.md +12 -0
  4. app.py +141 -0
  5. config.py +0 -0
  6. requirements.txt +11 -0
.gitattributes copy ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ enableXsrfProtection = false
3
+ enableCORS = false
4
+
5
+ [browser]
6
+ gatherUsageStats = false
7
+
8
+ [theme]
9
+
10
+ # Primary accent for interactive elements
11
+ primaryColor = '#7792E3'
12
+
13
+ # Background color for the main content area
14
+ backgroundColor = 'white'
15
+
16
+ # Background color for sidebar and most interactive widgets
17
+ secondaryBackgroundColor = '#D3D3D3'
18
+
19
+ # Color used for almost all text
20
+ textColor = 'black'
21
+
22
+ # Font family for all text in the app, except code blocks
23
+ # Accepted values (serif | sans serif | monospace)
24
+ # Default: "sans serif"
25
+ font = "sans serif"
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Quesiton Genration App
3
+ emoji: ⚡
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.33.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ st.title('Create Questions')
4
+ from openai import OpenAI
5
+ client = OpenAI(
6
+ # defaults to
7
+ api_key=os.environ.get("OPENAI_API_KEY"),
8
+ )
9
+ from langchain.chains.summarize import load_summarize_chain
10
+ # from langchain.document_loaders import PyPDFLoader
11
+ from langchain.text_splitter import CharacterTextSplitter
12
+ from PyPDF2 import PdfReader
13
+ import tempfile
14
+
15
+ def get_pdf_text(pdf_docs,start,end):
16
+ text = ""
17
+ pdf_reader = PdfReader(pdf_docs)
18
+ for i in range(start,end):
19
+ text += pdf_reader.pages[i].extract_text()
20
+ # PgOb = pdf_reader.getPage(i)
21
+ # text+= PgOb.extractText()
22
+ return text
23
+
24
+ # def get_text_chunks(text):
25
+ # text_splitter = CharacterTextSplitter(
26
+ # separator="\n",
27
+ # chunk_size=1024,
28
+ # chunk_overlap=200,
29
+ # length_function=len
30
+ # )
31
+ # chunks = text_splitter.split_text(text)
32
+ # return chunks
33
+
34
+ def inferencing(text ,task, number_of_quesiton,
35
+ diff_level,question_format):
36
+ # all_questions=[]
37
+ if(task== "Generate Summary"):
38
+ try:
39
+ summary_generate_prompt_tmpl=f"please genrate a section wise and point wise summary from the context :{text}"
40
+ response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":summary_generate_prompt_tmpl}] )
41
+ res = response.choices[0].message.content.strip()
42
+ return res
43
+
44
+ except Exception as e:
45
+ return e
46
+ else:
47
+ try:
48
+ # for i in all_docs:
49
+ if(question_format=="MCQ"):
50
+ qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
51
+ Number of Questions to be generated: {number_of_quesiton} \n \
52
+ Difficulty Level of questions: {diff_level} \n \
53
+ Question Format: MCQ with 4 options \n \
54
+ Use format from below: \n \
55
+ Question: Question \n \
56
+ - A): Choice\n \
57
+ - B): Choice\n \
58
+ - C): Choice\n \
59
+ - D): Choice\n \
60
+ - Answer: Answer\n \
61
+ {text}\n \
62
+ "
63
+ elif(question_format=="Short Answer"):
64
+ qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
65
+ Number of Questions to be generated: {number_of_quesiton} \n \
66
+ Difficulty Level of questions: {diff_level} \n \
67
+ Question Format: Short Answer Type \n \
68
+ Use format from below: \n \
69
+ Question: Question \n \
70
+ - Answer: Answer\n \
71
+ {text}\n \
72
+ "
73
+ elif(question_format=="Long Answer"):
74
+ qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
75
+ Number of Questions to be generated: {number_of_quesiton} \n \
76
+ Difficulty Level of questions: {diff_level} \n \
77
+ Question Format: Long Answer Type\n \
78
+ Use format from below: \n \
79
+ Question: Question \n \
80
+ - Answer: Answer\n \
81
+ {text}\n \
82
+ "
83
+
84
+ # llm = OpenAI(api_key="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", temperature=0,model = 'gpt-4')
85
+ # qa_dataset = generate_question_context_pairs(
86
+ # nodes, llm=llm, num_questions_per_chunk=2, qa_generate_prompt_tmpl=qa_generate_prompt_tmpl
87
+ # )
88
+ # mcq=[]
89
+ # for k, v in qa_dataset.queries.items():
90
+ # mcq.append(v)
91
+ # os.remove(filepath)
92
+ # return mcq
93
+ response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":qa_generate_prompt_tmpl}] )
94
+ res = response.choices[0].message.content.strip()
95
+
96
+ return res
97
+ except Exception as e:
98
+ return e
99
+
100
+ def uploadPdf(pdfs_folder):
101
+ summaries = []
102
+ for pdf_file in pdfs_folder:
103
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
104
+ temp_path = temp_file.name
105
+ temp_file.write(pdf_file.read())
106
+
107
+ return temp_path
108
+
109
+ # Allow user to upload PDF files
110
+ pdf_files = st.file_uploader("Upload PDF files", type="pdf")
111
+
112
+ if pdf_files:
113
+ # Generate summaries when the "Generate Summary" button is clicked
114
+ col1, col2 = st.columns(2)
115
+ with col1:
116
+ start = st.number_input("Start Page", step=1,placeholder="Start Page",key="2")
117
+ with col2:
118
+ end = st.number_input("End Page", step=1,placeholder="End Page",key="3")
119
+ processing=False
120
+
121
+ task = st.selectbox(
122
+ 'Chose A Task',
123
+ ("Generate Question", "Generate Summary"))
124
+ number_of_quesiton = st.number_input("Number of Questions",step=1, value=None, placeholder="Number of Qustions",key="1")
125
+ diff_level = st.selectbox(
126
+ 'Difficulty Level',
127
+ ("Easy", "Medium","Hard"))
128
+ question_format = st.selectbox(
129
+ 'Question Format',
130
+ ("MCQ", "Short Answer", "Long Answer"))
131
+
132
+ if st.button('Submit'):
133
+ with st.spinner('Wait for it...'):
134
+ raw_text = get_pdf_text(pdf_files,int(start),int(end))
135
+ # text_chunks = get_text_chunks(raw_text)
136
+ # st.write(text_chunks)
137
+ # st.write("file uploaded")
138
+ all_questions=inferencing(raw_text,task=task,number_of_quesiton=number_of_quesiton,
139
+ diff_level=diff_level,question_format=question_format)
140
+ st.write(all_questions)
141
+
config.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ llama-index
3
+ sentence-transformers
4
+ cohere
5
+ anthropic
6
+ voyageai
7
+ protobuf
8
+ pypdf
9
+ langchain
10
+ PyPDF2
11
+ pycryptodome