Ajeet001 commited on
Commit
61eba83
Β·
verified Β·
1 Parent(s): e6e0544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -89
app.py CHANGED
@@ -1,89 +1,90 @@
1
- import os
2
- import streamlit as st
3
- import yaml
4
- from dotenv import load_dotenv
5
- from src.document_loader import load_document
6
- from src.chunking_embedding import setup_chunking_and_embedding
7
- from src.vector_store import create_vectorstore
8
- from langchain_groq import ChatGroq
9
- from langchain.chains import ConversationalRetrievalChain
10
- from langchain.memory import ConversationBufferWindowMemory
11
- from src.utils import log_interaction
12
-
13
- # Load environment variables from .env file
14
- load_dotenv()
15
-
16
-
17
- # Load configuration
18
- config_path = os.path.join(os.getcwd(), "config", "config.yaml") # Ensure correct path
19
- with open(config_path, "r") as f:
20
- config = yaml.safe_load(f)
21
-
22
- # Get the GROQ API key from environment variables
23
- groq_api_key = os.getenv('groq_api_key')
24
- if not groq_api_key:
25
- raise ValueError("GROQ_API_KEY not found. Please set it in the .env file.")
26
-
27
- # Streamlit UI
28
- st.set_page_config(page_title="Your Document AI Assistant", page_icon="πŸ“„", layout="centered")
29
- st.title("πŸͺ… Document AI Assistant!")
30
-
31
- # Sidebar for document upload
32
- uploaded_file = st.sidebar.file_uploader(label="Upload your document (PDF/DOC/DOCX)", type=['pdf', 'doc', 'docx'])
33
- loader_type = st.sidebar.selectbox('Choose a loader type', config['loaders'])
34
- embedding_model = st.sidebar.selectbox('Choose an embedding model', config['embedding_models'])
35
- chunking_strategy = st.sidebar.selectbox('Choose a chunking strategy', config['chunking_strategies'])
36
- chunk_size = st.sidebar.number_input('Chunk Size', min_value=100, value=500, step=100)
37
- chunk_overlap = st.sidebar.number_input('Chunk Overlap', min_value=0, value=100, step=100)
38
- temperature = st.sidebar.slider('Temperature', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
39
- top_p = st.sidebar.slider('Top-p', min_value=0.0, max_value=1.0, value=0.9, step=0.1)
40
-
41
- # Initialize session state for conversation
42
- if 'chat_history' not in st.session_state:
43
- st.session_state.chat_history = []
44
- if 'conversation_chain' not in st.session_state:
45
- st.session_state.conversation_chain = None
46
-
47
- # Process the uploaded file
48
- if uploaded_file is not None:
49
- file_path = os.path.join("temp", uploaded_file.name)
50
- os.makedirs("temp", exist_ok=True)
51
- with open(file_path, "wb") as f:
52
- f.write(uploaded_file.getbuffer())
53
-
54
- documents = load_document(file_path, loader_type)
55
- doc_chunks, embeddings = setup_chunking_and_embedding(documents, chunking_strategy, chunk_size, chunk_overlap, embedding_model)
56
- vectorstore = create_vectorstore(doc_chunks, embeddings)
57
-
58
- # Create the conversational retrieval chain
59
- llm = ChatGroq(groq_api_key=groq_api_key, model_name='llama-3.3-70b-versatile', temperature=temperature)
60
- retriever = vectorstore.as_retriever()
61
- memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history", return_messages=True)
62
- st.session_state.conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
63
-
64
- # User input for asking a question
65
- user_input = st.text_input("Ask a question:")
66
- if st.button("Ask Question"):
67
- if user_input:
68
- with st.chat_message("user"):
69
- st.markdown(user_input)
70
-
71
- with st.chat_message("assistant"):
72
- response = st.session_state.conversation_chain({"question": user_input})
73
- assistant_response = response['answer']
74
- st.markdown(assistant_response)
75
- st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
76
-
77
- # Log interaction
78
- params = {
79
- "loader": loader_type,
80
- "chunking_strategy": chunking_strategy,
81
- "chunk_size": chunk_size,
82
- "chunk_overlap": chunk_overlap,
83
- "embedding_model": embedding_model,
84
- "temperature": temperature,
85
- "top_p": top_p
86
- }
87
- log_interaction(user_input, assistant_response, params)
88
-
89
-
 
 
1
+ # import library
2
+ import os
3
+ import streamlit as st
4
+ import yaml
5
+ from dotenv import load_dotenv
6
+ from src.document_loader import load_document
7
+ from src.chunking_embedding import setup_chunking_and_embedding
8
+ from src.vector_store import create_vectorstore
9
+ from langchain_groq import ChatGroq
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.memory import ConversationBufferWindowMemory
12
+ from src.utils import log_interaction
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+
18
+ # Load configuration
19
+ config_path = os.path.join(os.getcwd(), "config", "config.yaml") # Ensure correct path
20
+ with open(config_path, "r") as f:
21
+ config = yaml.safe_load(f)
22
+
23
+ # Get the GROQ API key from environment variables
24
+ groq_api_key = os.getenv('groq_api_key')
25
+ if not groq_api_key:
26
+ raise ValueError("GROQ_API_KEY not found. Please set it in the .env file.")
27
+
28
+ # Streamlit UI
29
+ st.set_page_config(page_title="Your Document AI Assistant", page_icon="πŸ“„", layout="centered")
30
+ st.title("πŸͺ… Document AI Assistant!")
31
+
32
+ # Sidebar for document upload
33
+ uploaded_file = st.sidebar.file_uploader(label="Upload your document (PDF/DOC/DOCX)", type=['pdf', 'doc', 'docx'])
34
+ loader_type = st.sidebar.selectbox('Choose a loader type', config['loaders'])
35
+ embedding_model = st.sidebar.selectbox('Choose an embedding model', config['embedding_models'])
36
+ chunking_strategy = st.sidebar.selectbox('Choose a chunking strategy', config['chunking_strategies'])
37
+ chunk_size = st.sidebar.number_input('Chunk Size', min_value=100, value=500, step=100)
38
+ chunk_overlap = st.sidebar.number_input('Chunk Overlap', min_value=0, value=100, step=100)
39
+ temperature = st.sidebar.slider('Temperature', min_value=0.0, max_value=1.0, value=0.0, step=0.1)
40
+ top_p = st.sidebar.slider('Top-p', min_value=0.0, max_value=1.0, value=0.9, step=0.1)
41
+
42
+ # Initialize session state for conversation
43
+ if 'chat_history' not in st.session_state:
44
+ st.session_state.chat_history = []
45
+ if 'conversation_chain' not in st.session_state:
46
+ st.session_state.conversation_chain = None
47
+
48
+ # Process the uploaded file
49
+ if uploaded_file is not None:
50
+ file_path = os.path.join("temp", uploaded_file.name)
51
+ os.makedirs("temp", exist_ok=True)
52
+ with open(file_path, "wb") as f:
53
+ f.write(uploaded_file.getbuffer())
54
+
55
+ documents = load_document(file_path, loader_type)
56
+ doc_chunks, embeddings = setup_chunking_and_embedding(documents, chunking_strategy, chunk_size, chunk_overlap, embedding_model)
57
+ vectorstore = create_vectorstore(doc_chunks, embeddings)
58
+
59
+ # Create the conversational retrieval chain
60
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name='llama-3.3-70b-versatile', temperature=temperature)
61
+ retriever = vectorstore.as_retriever()
62
+ memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history", return_messages=True)
63
+ st.session_state.conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
64
+
65
+ # User input for asking a question
66
+ user_input = st.text_input("Ask a question:")
67
+ if st.button("Ask Question"):
68
+ if user_input:
69
+ with st.chat_message("user"):
70
+ st.markdown(user_input)
71
+
72
+ with st.chat_message("assistant"):
73
+ response = st.session_state.conversation_chain({"question": user_input})
74
+ assistant_response = response['answer']
75
+ st.markdown(assistant_response)
76
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
77
+
78
+ # Log interaction
79
+ params = {
80
+ "loader": loader_type,
81
+ "chunking_strategy": chunking_strategy,
82
+ "chunk_size": chunk_size,
83
+ "chunk_overlap": chunk_overlap,
84
+ "embedding_model": embedding_model,
85
+ "temperature": temperature,
86
+ "top_p": top_p
87
+ }
88
+ log_interaction(user_input, assistant_response, params)
89
+
90
+