arsath-sm commited on
Commit
404cd45
1 Parent(s): e6c1719

Upload finalapp2.py

Browse files
Files changed (1) hide show
  1. finalapp2.py +110 -0
finalapp2.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain.chains import create_retrieval_chain
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # Initialize the LLM
16
+ llm = ChatNVIDIA(model="meta/llama-3.1-405b-instruct")
17
+
18
+ # Function to create or get vector store
19
+ def vector_embedding(uploaded_files):
20
+ if "vectors" not in st.session_state or uploaded_files:
21
+ st.session_state.embeddings = NVIDIAEmbeddings()
22
+ documents = []
23
+ for file in uploaded_files:
24
+ temp_file_path = f"temp_{file.name}"
25
+ with open(temp_file_path, "wb") as f:
26
+ f.write(file.getbuffer())
27
+ loader = PyPDFLoader(temp_file_path)
28
+ documents.extend(loader.load())
29
+ os.remove(temp_file_path)
30
+
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
32
+ final_documents = text_splitter.split_documents(documents)
33
+ st.session_state.vectors = FAISS.from_documents(
34
+ final_documents,
35
+ st.session_state.embeddings
36
+ )
37
+ return st.session_state.vectors
38
+
39
+ # Streamlit UI
40
+ st.title("RAG WITH EXTERNAL DOCS BY USING NVIDIA NIM ")
41
+
42
+ # File uploader for PDFs
43
+ uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
44
+
45
+ # Button to create vector store
46
+ if st.button("Create Document Embeddings"):
47
+ if not uploaded_files:
48
+ st.error("Please upload PDF files first.")
49
+ else:
50
+ with st.spinner("Creating embeddings..."):
51
+ vector_store = vector_embedding(uploaded_files)
52
+ st.success("FAISS vector store is ready")
53
+
54
+ # Input for user question
55
+ prompt1 = st.text_input("Enter your question about the documents:")
56
+
57
+ # Button to process the question
58
+ if st.button("Get Answer") and prompt1:
59
+ if "vectors" not in st.session_state:
60
+ st.error("Please create document embeddings first.")
61
+ else:
62
+ with st.spinner("Thinking..."):
63
+ prompt = ChatPromptTemplate.from_template("""
64
+ You are an intelligent assistant designed to analyze and explain content from a given link. Your task is to provide a comprehensive overview and detailed explanation of the content in a structured manner. Follow these steps:
65
+
66
+ 1. Overview and Fundamentals:
67
+ - Provide a brief overview of the main topic or subject matter of the link.
68
+ - Explain the fundamental concepts or principles related to the topic.
69
+ - Identify and define key terms or keywords that are crucial to understanding the content.
70
+ - List the main topics or sections covered in the link.
71
+
72
+ 2. Detailed Topic Explanation:
73
+ - For each main topic or section identified, provide a detailed explanation.
74
+ - Include relevant examples, if available, to illustrate key points.
75
+ - Highlight any important sub-topics or related concepts within each main topic.
76
+ - Ensure that the explanation is thorough and covers all significant aspects mentioned in the link.
77
+
78
+ 3. Common Functionality or Applications:
79
+ - Discuss common uses, applications, or real-world relevance of the main topic.
80
+ - If applicable, mention any tools, technologies, or methodologies associated with the topic.
81
+ - Highlight any current trends or future prospects related to the subject matter.
82
+
83
+ 4. Answering Questions:
84
+ - When answering specific questions about the content, refer back to the information you've analyzed.
85
+ - Provide clear, concise, and accurate responses based on the content of the link.
86
+ - If a question goes beyond the scope of the link's content, indicate this and provide the best possible answer based on the available information.
87
+
88
+ Remember to maintain a logical flow in your explanations, ensuring that complex ideas are broken down into understandable segments. Your goal is to make the content accessible and comprehensible to users with varying levels of familiarity with the subject matter.
89
+ <context>
90
+ {context}
91
+ </context>
92
+ Question: {input}
93
+ """)
94
+
95
+ document_chain = create_stuff_documents_chain(llm, prompt)
96
+ retriever = st.session_state.vectors.as_retriever()
97
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
98
+
99
+ start = time.process_time()
100
+ response = retrieval_chain.invoke({'input': prompt1})
101
+ end = time.process_time()
102
+
103
+ st.write("Answer:", response['answer'])
104
+ st.write(f"Response time: {end - start:.2f} seconds")
105
+
106
+ with st.expander("Documents similarity search"):
107
+ for i, doc in enumerate(response["context"]):
108
+ st.write(f"Document {i + 1}:")
109
+ st.write(doc.page_content)
110
+ st.write("------------------")