Upload 2 files
Browse files- app.py +107 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import ollama
|
3 |
+
import fitz
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Function to extract text from PDF
|
7 |
+
def extract_text_from_pdf(uploaded_file):
|
8 |
+
doc = fitz.open(uploaded_file)
|
9 |
+
text = ""
|
10 |
+
for page in doc:
|
11 |
+
text += page.get_text()
|
12 |
+
return text
|
13 |
+
|
14 |
+
# Function to chunk large text for context
|
15 |
+
def chunk_text(text, max_length=1000):
|
16 |
+
# Split the text into chunks with a maximum character length
|
17 |
+
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
|
18 |
+
return chunks
|
19 |
+
|
20 |
+
def save_uploaded_file(uploaded_file):
|
21 |
+
# Get the current working directory
|
22 |
+
save_path = os.getcwd()
|
23 |
+
# Create the full path for the file
|
24 |
+
file_path = os.path.join(save_path, uploaded_file.name)
|
25 |
+
|
26 |
+
# Save the file
|
27 |
+
with open(file_path, "wb") as f:
|
28 |
+
f.write(uploaded_file.getbuffer())
|
29 |
+
|
30 |
+
return st.success(f"Saved file: {uploaded_file.name} to {save_path}")
|
31 |
+
|
32 |
+
st.title("Chat with PDF!!!")
|
33 |
+
|
34 |
+
# Slider for description
|
35 |
+
st.subheader("How to Use This Application:")
|
36 |
+
st.slider(
|
37 |
+
"Slide to explore the usage description",
|
38 |
+
min_value=0, max_value=100, step=1, value=50
|
39 |
+
)
|
40 |
+
|
41 |
+
# Displaying detailed description and instructions
|
42 |
+
st.markdown("""
|
43 |
+
### Welcome to the 'Chat with PDF' Application!
|
44 |
+
|
45 |
+
**Description**:
|
46 |
+
This web application allows you to interact with the contents of a PDF document by uploading a file and asking questions about it. The application processes the uploaded PDF, extracts the text, and uses a powerful Large Language Model (LLM) to respond to your questions in real time.
|
47 |
+
|
48 |
+
**Model Used**:
|
49 |
+
The application leverages the **Ollama LLM** (specifically `llama3.1` model), which is capable of understanding the text from the PDF and providing answers. The model is fine-tuned to handle natural language processing tasks and is adept at working with both short and long texts.
|
50 |
+
|
51 |
+
**How It Works**:
|
52 |
+
1. **Upload a PDF File**:
|
53 |
+
Use the file uploader to select and upload the PDF file you wish to analyze. The file should be in `.pdf` format.
|
54 |
+
|
55 |
+
2. **Text Extraction**:
|
56 |
+
The application extracts the text from the uploaded PDF using the `PyMuPDF` library (imported as `fitz`). This library enables the reading and extraction of text from each page in the PDF.
|
57 |
+
|
58 |
+
3. **Text Chunking**:
|
59 |
+
The extracted text may be very large, so it is divided into smaller chunks to facilitate better processing. By default, each chunk contains up to 1000 characters. These chunks serve as context for answering questions.
|
60 |
+
|
61 |
+
4. **Ask Questions**:
|
62 |
+
After the text is processed and chunked, you can ask questions related to the content of the PDF. Simply type your question in the text area provided on the app.
|
63 |
+
|
64 |
+
5. **Model Response**:
|
65 |
+
When you ask a question, the app sends the prompt (your question) along with the relevant chunk of text to the `llama3.1` model. The model then generates a response based on the content it was provided. The response is displayed in the app.
|
66 |
+
|
67 |
+
6. **Receive Insights**:
|
68 |
+
The answers are tailored to the content of the PDF, providing detailed, context-specific insights to help you better understand the document.
|
69 |
+
|
70 |
+
**Features**:
|
71 |
+
- Upload any PDF document for analysis.
|
72 |
+
- Ask natural language questions based on the document's content.
|
73 |
+
- Get accurate and context-aware responses generated by a state-of-the-art LLM.
|
74 |
+
- Split large documents into manageable chunks for optimal performance.
|
75 |
+
|
76 |
+
**Why Use This App?**
|
77 |
+
- If you're reading a long PDF and need quick answers, this tool can assist by summarizing sections of the document or directly answering your specific questions.
|
78 |
+
- Useful for academic research, legal documents, technical papers, or any lengthy PDF content that needs to be understood quickly.
|
79 |
+
|
80 |
+
**Try it now** and start chatting with your PDF to gain insights faster and more efficiently!
|
81 |
+
""")
|
82 |
+
|
83 |
+
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
|
84 |
+
|
85 |
+
if uploaded_file is not None:
|
86 |
+
save_uploaded_file(uploaded_file)
|
87 |
+
# Extract text from the uploaded PDF
|
88 |
+
pdf_text = extract_text_from_pdf(uploaded_file)
|
89 |
+
|
90 |
+
# Chunk the extracted text
|
91 |
+
text_chunks = chunk_text(pdf_text)
|
92 |
+
|
93 |
+
# Display the first chunk as a summary
|
94 |
+
st.subheader("PDF Content Summary:")
|
95 |
+
st.write(text_chunks[0]) # Display first chunk
|
96 |
+
|
97 |
+
# Input for user prompt
|
98 |
+
prompt = st.text_area(label="Ask a question based on the PDF content")
|
99 |
+
button = st.button("Ok")
|
100 |
+
|
101 |
+
if button:
|
102 |
+
if prompt:
|
103 |
+
# Select a chunk of text to send with the prompt
|
104 |
+
chunk_to_send = text_chunks[0] # You could select based on user's query
|
105 |
+
combined_prompt = f"Based on the following content: {chunk_to_send}\n\nQuestions: {prompt}"
|
106 |
+
response = ollama.generate(model="llama3.1", prompt=combined_prompt)
|
107 |
+
st.markdown(response["response"])
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
ollama
|
3 |
+
fitz
|
4 |
+
os
|
5 |
+
|
6 |
+
|