Spaces:
Sleeping
Sleeping
Commit
·
f3ffe02
1
Parent(s):
436c5b5
Add application file
Browse files- .gitattributes copy +35 -0
- .streamlit/config.toml +25 -0
- README copy.md +12 -0
- app.py +141 -0
- config.py +0 -0
- requirements.txt +11 -0
.gitattributes copy
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
enableXsrfProtection = false
|
3 |
+
enableCORS = false
|
4 |
+
|
5 |
+
[browser]
|
6 |
+
gatherUsageStats = false
|
7 |
+
|
8 |
+
[theme]
|
9 |
+
|
10 |
+
# Primary accent for interactive elements
|
11 |
+
primaryColor = '#7792E3'
|
12 |
+
|
13 |
+
# Background color for the main content area
|
14 |
+
backgroundColor = 'white'
|
15 |
+
|
16 |
+
# Background color for sidebar and most interactive widgets
|
17 |
+
secondaryBackgroundColor = '#D3D3D3'
|
18 |
+
|
19 |
+
# Color used for almost all text
|
20 |
+
textColor = 'black'
|
21 |
+
|
22 |
+
# Font family for all text in the app, except code blocks
|
23 |
+
# Accepted values (serif | sans serif | monospace)
|
24 |
+
# Default: "sans serif"
|
25 |
+
font = "sans serif"
|
README copy.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Quesiton Genration App
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.33.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
st.title('Create Questions')
|
4 |
+
from openai import OpenAI
|
5 |
+
client = OpenAI(
|
6 |
+
# defaults to
|
7 |
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
8 |
+
)
|
9 |
+
from langchain.chains.summarize import load_summarize_chain
|
10 |
+
# from langchain.document_loaders import PyPDFLoader
|
11 |
+
from langchain.text_splitter import CharacterTextSplitter
|
12 |
+
from PyPDF2 import PdfReader
|
13 |
+
import tempfile
|
14 |
+
|
15 |
+
def get_pdf_text(pdf_docs,start,end):
|
16 |
+
text = ""
|
17 |
+
pdf_reader = PdfReader(pdf_docs)
|
18 |
+
for i in range(start,end):
|
19 |
+
text += pdf_reader.pages[i].extract_text()
|
20 |
+
# PgOb = pdf_reader.getPage(i)
|
21 |
+
# text+= PgOb.extractText()
|
22 |
+
return text
|
23 |
+
|
24 |
+
# def get_text_chunks(text):
|
25 |
+
# text_splitter = CharacterTextSplitter(
|
26 |
+
# separator="\n",
|
27 |
+
# chunk_size=1024,
|
28 |
+
# chunk_overlap=200,
|
29 |
+
# length_function=len
|
30 |
+
# )
|
31 |
+
# chunks = text_splitter.split_text(text)
|
32 |
+
# return chunks
|
33 |
+
|
34 |
+
def inferencing(text ,task, number_of_quesiton,
|
35 |
+
diff_level,question_format):
|
36 |
+
# all_questions=[]
|
37 |
+
if(task== "Generate Summary"):
|
38 |
+
try:
|
39 |
+
summary_generate_prompt_tmpl=f"please genrate a section wise and point wise summary from the context :{text}"
|
40 |
+
response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":summary_generate_prompt_tmpl}] )
|
41 |
+
res = response.choices[0].message.content.strip()
|
42 |
+
return res
|
43 |
+
|
44 |
+
except Exception as e:
|
45 |
+
return e
|
46 |
+
else:
|
47 |
+
try:
|
48 |
+
# for i in all_docs:
|
49 |
+
if(question_format=="MCQ"):
|
50 |
+
qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
|
51 |
+
Number of Questions to be generated: {number_of_quesiton} \n \
|
52 |
+
Difficulty Level of questions: {diff_level} \n \
|
53 |
+
Question Format: MCQ with 4 options \n \
|
54 |
+
Use format from below: \n \
|
55 |
+
Question: Question \n \
|
56 |
+
- A): Choice\n \
|
57 |
+
- B): Choice\n \
|
58 |
+
- C): Choice\n \
|
59 |
+
- D): Choice\n \
|
60 |
+
- Answer: Answer\n \
|
61 |
+
{text}\n \
|
62 |
+
"
|
63 |
+
elif(question_format=="Short Answer"):
|
64 |
+
qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
|
65 |
+
Number of Questions to be generated: {number_of_quesiton} \n \
|
66 |
+
Difficulty Level of questions: {diff_level} \n \
|
67 |
+
Question Format: Short Answer Type \n \
|
68 |
+
Use format from below: \n \
|
69 |
+
Question: Question \n \
|
70 |
+
- Answer: Answer\n \
|
71 |
+
{text}\n \
|
72 |
+
"
|
73 |
+
elif(question_format=="Long Answer"):
|
74 |
+
qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
|
75 |
+
Number of Questions to be generated: {number_of_quesiton} \n \
|
76 |
+
Difficulty Level of questions: {diff_level} \n \
|
77 |
+
Question Format: Long Answer Type\n \
|
78 |
+
Use format from below: \n \
|
79 |
+
Question: Question \n \
|
80 |
+
- Answer: Answer\n \
|
81 |
+
{text}\n \
|
82 |
+
"
|
83 |
+
|
84 |
+
# llm = OpenAI(api_key="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", temperature=0,model = 'gpt-4')
|
85 |
+
# qa_dataset = generate_question_context_pairs(
|
86 |
+
# nodes, llm=llm, num_questions_per_chunk=2, qa_generate_prompt_tmpl=qa_generate_prompt_tmpl
|
87 |
+
# )
|
88 |
+
# mcq=[]
|
89 |
+
# for k, v in qa_dataset.queries.items():
|
90 |
+
# mcq.append(v)
|
91 |
+
# os.remove(filepath)
|
92 |
+
# return mcq
|
93 |
+
response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":qa_generate_prompt_tmpl}] )
|
94 |
+
res = response.choices[0].message.content.strip()
|
95 |
+
|
96 |
+
return res
|
97 |
+
except Exception as e:
|
98 |
+
return e
|
99 |
+
|
100 |
+
def uploadPdf(pdfs_folder):
|
101 |
+
summaries = []
|
102 |
+
for pdf_file in pdfs_folder:
|
103 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
104 |
+
temp_path = temp_file.name
|
105 |
+
temp_file.write(pdf_file.read())
|
106 |
+
|
107 |
+
return temp_path
|
108 |
+
|
109 |
+
# Allow user to upload PDF files
|
110 |
+
pdf_files = st.file_uploader("Upload PDF files", type="pdf")
|
111 |
+
|
112 |
+
if pdf_files:
|
113 |
+
# Generate summaries when the "Generate Summary" button is clicked
|
114 |
+
col1, col2 = st.columns(2)
|
115 |
+
with col1:
|
116 |
+
start = st.number_input("Start Page", step=1,placeholder="Start Page",key="2")
|
117 |
+
with col2:
|
118 |
+
end = st.number_input("End Page", step=1,placeholder="End Page",key="3")
|
119 |
+
processing=False
|
120 |
+
|
121 |
+
task = st.selectbox(
|
122 |
+
'Chose A Task',
|
123 |
+
("Generate Question", "Generate Summary"))
|
124 |
+
number_of_quesiton = st.number_input("Number of Questions",step=1, value=None, placeholder="Number of Qustions",key="1")
|
125 |
+
diff_level = st.selectbox(
|
126 |
+
'Difficulty Level',
|
127 |
+
("Easy", "Medium","Hard"))
|
128 |
+
question_format = st.selectbox(
|
129 |
+
'Question Format',
|
130 |
+
("MCQ", "Short Answer", "Long Answer"))
|
131 |
+
|
132 |
+
if st.button('Submit'):
|
133 |
+
with st.spinner('Wait for it...'):
|
134 |
+
raw_text = get_pdf_text(pdf_files,int(start),int(end))
|
135 |
+
# text_chunks = get_text_chunks(raw_text)
|
136 |
+
# st.write(text_chunks)
|
137 |
+
# st.write("file uploaded")
|
138 |
+
all_questions=inferencing(raw_text,task=task,number_of_quesiton=number_of_quesiton,
|
139 |
+
diff_level=diff_level,question_format=question_format)
|
140 |
+
st.write(all_questions)
|
141 |
+
|
config.py
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
llama-index
|
3 |
+
sentence-transformers
|
4 |
+
cohere
|
5 |
+
anthropic
|
6 |
+
voyageai
|
7 |
+
protobuf
|
8 |
+
pypdf
|
9 |
+
langchain
|
10 |
+
PyPDF2
|
11 |
+
pycryptodome
|