File size: 7,235 Bytes
f3ffe02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import streamlit as st
import os
st.title('Create Questions')
from openai import OpenAI
client = OpenAI(
    # defaults to 
    api_key=os.environ.get("OPENAI_API_KEY"),
)
from langchain.chains.summarize import load_summarize_chain
# from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from PyPDF2 import PdfReader
import tempfile

def get_pdf_text(pdf_docs,start,end):
    text = ""
    pdf_reader = PdfReader(pdf_docs)
    for i in range(start,end):
            text += pdf_reader.pages[i].extract_text()
        # PgOb = pdf_reader.getPage(i)
        # text+= PgOb.extractText()
    return text

# def get_text_chunks(text):
#     text_splitter = CharacterTextSplitter(
#         separator="\n",
#         chunk_size=1024,
#         chunk_overlap=200,
#         length_function=len
#     )
#     chunks = text_splitter.split_text(text)
#     return chunks

def inferencing(text ,task, number_of_quesiton,
                                        diff_level,question_format):
    # all_questions=[]
    if(task== "Generate Summary"):
        try:
            summary_generate_prompt_tmpl=f"please genrate a section wise and point wise summary from the context :{text}"
            response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":summary_generate_prompt_tmpl}] ) 
            res = response.choices[0].message.content.strip()
            return res
             
        except Exception as e:
            return e   
    else:
        try:        
            # for i in all_docs:
            if(question_format=="MCQ"):
                qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
                Number of Questions to be generated: {number_of_quesiton} \n \
                Difficulty Level of questions: {diff_level} \n \
                Question Format: MCQ with 4 options \n \
                Use format from below: \n \
                    Question: Question \n \
                    - A): Choice\n \
                    - B): Choice\n \
                    - C): Choice\n \
                    - D): Choice\n \
                    - Answer: Answer\n \
                    {text}\n \
                    "
            elif(question_format=="Short Answer"):
                qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
                Number of Questions to be generated: {number_of_quesiton} \n \
                Difficulty Level of questions: {diff_level} \n \
                Question Format: Short Answer Type \n \
                Use format from below: \n \
                Question: Question \n \
                - Answer: Answer\n \
                {text}\n \
                "
            elif(question_format=="Long Answer"):
                qa_generate_prompt_tmpl = f"Below is a text extract. Please generate questions for a certification as per the below mentioned parameters and please also generate the correct answer for each question separately. Please number the questions and please do not generate any explanation. Please provide the complete details of each question by explicitly mentioning the context. Please do not refer back to the text or part of the text extract in the questions. For example, for a question related to an act in the text, provide the name of the act and do not just mention this act or the act etc.\n \
                Number of Questions to be generated: {number_of_quesiton} \n \
                Difficulty Level of questions: {diff_level} \n \
                Question Format: Long Answer Type\n \
                Use format from below: \n \
                Question: Question \n \
                - Answer: Answer\n \
                {text}\n \
                "
            
            # llm = OpenAI(api_key="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", temperature=0,model = 'gpt-4')
            # qa_dataset = generate_question_context_pairs(
            #     nodes, llm=llm, num_questions_per_chunk=2, qa_generate_prompt_tmpl=qa_generate_prompt_tmpl
            # )
            # mcq=[]
            # for k, v in qa_dataset.queries.items():
            #     mcq.append(v)
            # os.remove(filepath)
            # return mcq
            response = client.chat.completions.create( model="gpt-4", messages=[{"role": "user", "content":qa_generate_prompt_tmpl}] ) 
            res = response.choices[0].message.content.strip()
            
            return res
        except Exception as e:
            return e
    
def uploadPdf(pdfs_folder):
    summaries = []
    for pdf_file in pdfs_folder:
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            temp_path = temp_file.name
            temp_file.write(pdf_file.read())

    return temp_path

# Allow user to upload PDF files
pdf_files = st.file_uploader("Upload PDF files", type="pdf")

if pdf_files:
    # Generate summaries when the "Generate Summary" button is clicked
    col1, col2 = st.columns(2)
    with col1:
            start = st.number_input("Start Page", step=1,placeholder="Start Page",key="2")
    with col2:
            end = st.number_input("End Page", step=1,placeholder="End Page",key="3")
    processing=False
   
    task = st.selectbox(
        'Chose A Task',
        ("Generate Question", "Generate Summary"))
    number_of_quesiton = st.number_input("Number of Questions",step=1, value=None, placeholder="Number of Qustions",key="1")
    diff_level = st.selectbox(
        'Difficulty Level',
        ("Easy", "Medium","Hard"))
    question_format = st.selectbox(
        'Question Format',
            ("MCQ", "Short Answer", "Long Answer"))
    
    if st.button('Submit'):
        with st.spinner('Wait for it...'):
            raw_text = get_pdf_text(pdf_files,int(start),int(end))
            # text_chunks = get_text_chunks(raw_text)
            # st.write(text_chunks)
            # st.write("file uploaded")
            all_questions=inferencing(raw_text,task=task,number_of_quesiton=number_of_quesiton,
                            diff_level=diff_level,question_format=question_format)
            st.write(all_questions)