Anuj02003 commited on
Commit
b4c4ecf
·
verified ·
1 Parent(s): 46c2205

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +107 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import ollama
3
+ import fitz
4
+ import os
5
+
6
+ # Function to extract text from PDF
7
+ def extract_text_from_pdf(uploaded_file):
8
+ doc = fitz.open(uploaded_file)
9
+ text = ""
10
+ for page in doc:
11
+ text += page.get_text()
12
+ return text
13
+
14
+ # Function to chunk large text for context
15
+ def chunk_text(text, max_length=1000):
16
+ # Split the text into chunks with a maximum character length
17
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
18
+ return chunks
19
+
20
+ def save_uploaded_file(uploaded_file):
21
+ # Get the current working directory
22
+ save_path = os.getcwd()
23
+ # Create the full path for the file
24
+ file_path = os.path.join(save_path, uploaded_file.name)
25
+
26
+ # Save the file
27
+ with open(file_path, "wb") as f:
28
+ f.write(uploaded_file.getbuffer())
29
+
30
+ return st.success(f"Saved file: {uploaded_file.name} to {save_path}")
31
+
32
+ st.title("Chat with PDF!!!")
33
+
34
+ # Slider for description
35
+ st.subheader("How to Use This Application:")
36
+ st.slider(
37
+ "Slide to explore the usage description",
38
+ min_value=0, max_value=100, step=1, value=50
39
+ )
40
+
41
+ # Displaying detailed description and instructions
42
+ st.markdown("""
43
+ ### Welcome to the 'Chat with PDF' Application!
44
+
45
+ **Description**:
46
+ This web application allows you to interact with the contents of a PDF document by uploading a file and asking questions about it. The application processes the uploaded PDF, extracts the text, and uses a powerful Large Language Model (LLM) to respond to your questions in real time.
47
+
48
+ **Model Used**:
49
+ The application leverages the **Ollama LLM** (specifically `llama3.1` model), which is capable of understanding the text from the PDF and providing answers. The model is fine-tuned to handle natural language processing tasks and is adept at working with both short and long texts.
50
+
51
+ **How It Works**:
52
+ 1. **Upload a PDF File**:
53
+ Use the file uploader to select and upload the PDF file you wish to analyze. The file should be in `.pdf` format.
54
+
55
+ 2. **Text Extraction**:
56
+ The application extracts the text from the uploaded PDF using the `PyMuPDF` library (imported as `fitz`). This library enables the reading and extraction of text from each page in the PDF.
57
+
58
+ 3. **Text Chunking**:
59
+ The extracted text may be very large, so it is divided into smaller chunks to facilitate better processing. By default, each chunk contains up to 1000 characters. These chunks serve as context for answering questions.
60
+
61
+ 4. **Ask Questions**:
62
+ After the text is processed and chunked, you can ask questions related to the content of the PDF. Simply type your question in the text area provided on the app.
63
+
64
+ 5. **Model Response**:
65
+ When you ask a question, the app sends the prompt (your question) along with the relevant chunk of text to the `llama3.1` model. The model then generates a response based on the content it was provided. The response is displayed in the app.
66
+
67
+ 6. **Receive Insights**:
68
+ The answers are tailored to the content of the PDF, providing detailed, context-specific insights to help you better understand the document.
69
+
70
+ **Features**:
71
+ - Upload any PDF document for analysis.
72
+ - Ask natural language questions based on the document's content.
73
+ - Get accurate and context-aware responses generated by a state-of-the-art LLM.
74
+ - Split large documents into manageable chunks for optimal performance.
75
+
76
+ **Why Use This App?**
77
+ - If you're reading a long PDF and need quick answers, this tool can assist by summarizing sections of the document or directly answering your specific questions.
78
+ - Useful for academic research, legal documents, technical papers, or any lengthy PDF content that needs to be understood quickly.
79
+
80
+ **Try it now** and start chatting with your PDF to gain insights faster and more efficiently!
81
+ """)
82
+
83
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
84
+
85
+ if uploaded_file is not None:
86
+ save_uploaded_file(uploaded_file)
87
+ # Extract text from the uploaded PDF
88
+ pdf_text = extract_text_from_pdf(uploaded_file)
89
+
90
+ # Chunk the extracted text
91
+ text_chunks = chunk_text(pdf_text)
92
+
93
+ # Display the first chunk as a summary
94
+ st.subheader("PDF Content Summary:")
95
+ st.write(text_chunks[0]) # Display first chunk
96
+
97
+ # Input for user prompt
98
+ prompt = st.text_area(label="Ask a question based on the PDF content")
99
+ button = st.button("Ok")
100
+
101
+ if button:
102
+ if prompt:
103
+ # Select a chunk of text to send with the prompt
104
+ chunk_to_send = text_chunks[0] # You could select based on user's query
105
+ combined_prompt = f"Based on the following content: {chunk_to_send}\n\nQuestions: {prompt}"
106
+ response = ollama.generate(model="llama3.1", prompt=combined_prompt)
107
+ st.markdown(response["response"])
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ ollama
3
+ fitz
4
+ os
5
+
6
+