Anuj02003 commited on
Commit
87f9ce7
·
verified ·
1 Parent(s): 207a16f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import fitz # PyMuPDF for PDF handling
4
+
5
+ # Function to extract text from PDF
6
+ def extract_text_from_pdf(uploaded_file):
7
+ doc = fitz.open(uploaded_file)
8
+ text = ""
9
+ for page in doc:
10
+ text += page.get_text()
11
+ return text
12
+
13
+ # Function to chunk large text for context
14
+ def chunk_text(text, max_length=1000):
15
+ # Split the text into chunks with a maximum character length
16
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
17
+ return chunks
18
+
19
+ # Initialize Hugging Face model pipeline
20
+ @st.cache_resource
21
+ def load_model():
22
+ # Using a publicly available model like gpt2
23
+ return pipeline("text-generation", model="gpt2")
24
+
25
+ model_pipeline = load_model()
26
+
27
+ # Generate a response
28
+ def get_response(prompt, context):
29
+ combined_prompt = f"{context}\n\nQuestion: {prompt}"
30
+ response = model_pipeline(combined_prompt, max_length=200, num_return_sequences=1)
31
+ return response[0]["generated_text"]
32
+
33
+ # Streamlit App UI
34
+ st.title("Chat with PDF!!!")
35
+
36
+ # Slider for description
37
+ st.subheader("How to Use This Application:")
38
+ st.slider(
39
+ "Slide to explore the usage description",
40
+ min_value=0, max_value=100, step=1, value=50
41
+ )
42
+
43
+ # Displaying detailed description and instructions
44
+ st.markdown("""
45
+ ### Welcome to the 'Chat with PDF' Application!
46
+
47
+ **Description**:
48
+ This web application allows you to interact with the contents of a PDF document by uploading a file and asking questions about it. The application processes the uploaded PDF, extracts the text, and uses a powerful Large Language Model (LLM) to respond to your questions in real time.
49
+
50
+ **Model Used**:
51
+ The application leverages the **GPT-2 Model**, a publicly available language model that can understand the text from the PDF and provide answers. GPT-2 works well with both short and long texts, making it ideal for this use case.
52
+
53
+ **How It Works**:
54
+ 1. **Upload a PDF File**:
55
+ Use the file uploader to select and upload the PDF file you wish to analyze. The file should be in `.pdf` format.
56
+
57
+ 2. **Text Extraction**:
58
+ The application extracts the text from the uploaded PDF using the `PyMuPDF` library (imported as `fitz`). This library enables the reading and extraction of text from each page in the PDF.
59
+
60
+ 3. **Text Chunking**:
61
+ The extracted text may be very large, so it is divided into smaller chunks to facilitate better processing. By default, each chunk contains up to 1000 characters. These chunks serve as context for answering questions.
62
+
63
+ 4. **Ask Questions**:
64
+ After the text is processed and chunked, you can ask questions related to the content of the PDF. Simply type your question in the text area provided on the app.
65
+
66
+ 5. **Model Response**:
67
+ When you ask a question, the app sends the prompt (your question) along with the relevant chunk of text to the Hugging Face model. The model then generates a response based on the content it was provided. The response is displayed in the app.
68
+
69
+ 6. **Receive Insights**:
70
+ The answers are tailored to the content of the PDF, providing detailed, context-specific insights to help you better understand the document.
71
+
72
+ **Features**:
73
+ - Upload any PDF document for analysis.
74
+ - Ask natural language questions based on the document's content.
75
+ - Get accurate and context-aware responses generated by a state-of-the-art LLM.
76
+ - Split large documents into manageable chunks for optimal performance.
77
+
78
+ **Why Use This App?**
79
+ - If you're reading a long PDF and need quick answers, this tool can assist by summarizing sections of the document or directly answering your specific questions.
80
+ - Useful for academic research, legal documents, technical papers, or any lengthy PDF content that needs to be understood quickly.
81
+
82
+ **Try it now** and start chatting with your PDF to gain insights faster and more efficiently!
83
+ """)
84
+
85
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
86
+
87
+ if uploaded_file is not None:
88
+ # Extract text from the uploaded PDF
89
+ pdf_text = extract_text_from_pdf(uploaded_file)
90
+
91
+ # Chunk the extracted text
92
+ text_chunks = chunk_text(pdf_text)
93
+
94
+ # Display the first chunk as a summary
95
+ st.subheader("PDF Content Summary:")
96
+ st.write(text_chunks[0]) # Display first chunk
97
+
98
+ # Input for user prompt
99
+ prompt = st.text_area(label="Ask a question based on the PDF content")
100
+ button = st.button("Submit")
101
+
102
+ if button:
103
+ if prompt:
104
+ # Select a chunk of text to send with the prompt
105
+ chunk_to_send = text_chunks[0] # Select the relevant chunk
106
+ response = get_response(prompt, chunk_to_send)
107
+ st.markdown(f"**Response:**\n\n{response}")