Spaces:
Sleeping
Sleeping
Upload finalapp2.py
Browse files- finalapp2.py +110 -0
finalapp2.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
|
5 |
+
from langchain_community.document_loaders import PyPDFLoader
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
8 |
+
from langchain_core.prompts import ChatPromptTemplate
|
9 |
+
from langchain_community.vectorstores import FAISS
|
10 |
+
from langchain.chains import create_retrieval_chain
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
# Initialize the LLM
|
16 |
+
llm = ChatNVIDIA(model="meta/llama-3.1-405b-instruct")
|
17 |
+
|
18 |
+
# Function to create or get vector store
|
19 |
+
def vector_embedding(uploaded_files):
|
20 |
+
if "vectors" not in st.session_state or uploaded_files:
|
21 |
+
st.session_state.embeddings = NVIDIAEmbeddings()
|
22 |
+
documents = []
|
23 |
+
for file in uploaded_files:
|
24 |
+
temp_file_path = f"temp_{file.name}"
|
25 |
+
with open(temp_file_path, "wb") as f:
|
26 |
+
f.write(file.getbuffer())
|
27 |
+
loader = PyPDFLoader(temp_file_path)
|
28 |
+
documents.extend(loader.load())
|
29 |
+
os.remove(temp_file_path)
|
30 |
+
|
31 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
|
32 |
+
final_documents = text_splitter.split_documents(documents)
|
33 |
+
st.session_state.vectors = FAISS.from_documents(
|
34 |
+
final_documents,
|
35 |
+
st.session_state.embeddings
|
36 |
+
)
|
37 |
+
return st.session_state.vectors
|
38 |
+
|
39 |
+
# Streamlit UI
|
40 |
+
st.title("RAG WITH EXTERNAL DOCS BY USING NVIDIA NIM ")
|
41 |
+
|
42 |
+
# File uploader for PDFs
|
43 |
+
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
|
44 |
+
|
45 |
+
# Button to create vector store
|
46 |
+
if st.button("Create Document Embeddings"):
|
47 |
+
if not uploaded_files:
|
48 |
+
st.error("Please upload PDF files first.")
|
49 |
+
else:
|
50 |
+
with st.spinner("Creating embeddings..."):
|
51 |
+
vector_store = vector_embedding(uploaded_files)
|
52 |
+
st.success("FAISS vector store is ready")
|
53 |
+
|
54 |
+
# Input for user question
|
55 |
+
prompt1 = st.text_input("Enter your question about the documents:")
|
56 |
+
|
57 |
+
# Button to process the question
|
58 |
+
if st.button("Get Answer") and prompt1:
|
59 |
+
if "vectors" not in st.session_state:
|
60 |
+
st.error("Please create document embeddings first.")
|
61 |
+
else:
|
62 |
+
with st.spinner("Thinking..."):
|
63 |
+
prompt = ChatPromptTemplate.from_template("""
|
64 |
+
You are an intelligent assistant designed to analyze and explain content from a given link. Your task is to provide a comprehensive overview and detailed explanation of the content in a structured manner. Follow these steps:
|
65 |
+
|
66 |
+
1. Overview and Fundamentals:
|
67 |
+
- Provide a brief overview of the main topic or subject matter of the link.
|
68 |
+
- Explain the fundamental concepts or principles related to the topic.
|
69 |
+
- Identify and define key terms or keywords that are crucial to understanding the content.
|
70 |
+
- List the main topics or sections covered in the link.
|
71 |
+
|
72 |
+
2. Detailed Topic Explanation:
|
73 |
+
- For each main topic or section identified, provide a detailed explanation.
|
74 |
+
- Include relevant examples, if available, to illustrate key points.
|
75 |
+
- Highlight any important sub-topics or related concepts within each main topic.
|
76 |
+
- Ensure that the explanation is thorough and covers all significant aspects mentioned in the link.
|
77 |
+
|
78 |
+
3. Common Functionality or Applications:
|
79 |
+
- Discuss common uses, applications, or real-world relevance of the main topic.
|
80 |
+
- If applicable, mention any tools, technologies, or methodologies associated with the topic.
|
81 |
+
- Highlight any current trends or future prospects related to the subject matter.
|
82 |
+
|
83 |
+
4. Answering Questions:
|
84 |
+
- When answering specific questions about the content, refer back to the information you've analyzed.
|
85 |
+
- Provide clear, concise, and accurate responses based on the content of the link.
|
86 |
+
- If a question goes beyond the scope of the link's content, indicate this and provide the best possible answer based on the available information.
|
87 |
+
|
88 |
+
Remember to maintain a logical flow in your explanations, ensuring that complex ideas are broken down into understandable segments. Your goal is to make the content accessible and comprehensible to users with varying levels of familiarity with the subject matter.
|
89 |
+
<context>
|
90 |
+
{context}
|
91 |
+
</context>
|
92 |
+
Question: {input}
|
93 |
+
""")
|
94 |
+
|
95 |
+
document_chain = create_stuff_documents_chain(llm, prompt)
|
96 |
+
retriever = st.session_state.vectors.as_retriever()
|
97 |
+
retrieval_chain = create_retrieval_chain(retriever, document_chain)
|
98 |
+
|
99 |
+
start = time.process_time()
|
100 |
+
response = retrieval_chain.invoke({'input': prompt1})
|
101 |
+
end = time.process_time()
|
102 |
+
|
103 |
+
st.write("Answer:", response['answer'])
|
104 |
+
st.write(f"Response time: {end - start:.2f} seconds")
|
105 |
+
|
106 |
+
with st.expander("Documents similarity search"):
|
107 |
+
for i, doc in enumerate(response["context"]):
|
108 |
+
st.write(f"Document {i + 1}:")
|
109 |
+
st.write(doc.page_content)
|
110 |
+
st.write("------------------")
|