DrishtiSharma commited on
Commit
cfb9d35
Β·
verified Β·
1 Parent(s): 5704aad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -14,6 +14,7 @@ from langchain.chains import create_retrieval_chain
14
  from langchain.chains.combine_documents import create_stuff_documents_chain
15
  from langchain_core.prompts import ChatPromptTemplate
16
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 
17
  import faiss
18
  import tempfile
19
 
@@ -44,7 +45,7 @@ if uploaded_file:
44
  data.to_csv(temp_file.name, index=False, encoding="utf-8")
45
  temp_file.flush()
46
 
47
- # Debugging: Verify the temporary file (Display partially)
48
  st.write("Temporary file path:", temp_file_path)
49
  with open(temp_file_path, "r") as f:
50
  content = f.read()
@@ -54,35 +55,35 @@ if uploaded_file:
54
  # Tabs for LangChain and LlamaIndex
55
  tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
56
 
57
- # LangChain Tab with Proper FAISS Initialization
58
  with tab1:
59
  st.subheader("LangChain Query")
 
60
  try:
61
- # Custom preprocessing: Split CSV rows into smaller chunks
62
  st.write("Processing CSV with a custom loader...")
63
  documents = []
64
  for _, row in data.iterrows():
65
  content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
66
- documents.append({"page_content": content})
67
-
68
- # Debugging: Preview loaded documents
69
- #st.write("Successfully processed documents:")
70
- #if documents:
71
- # st.text(documents[0]["page_content"])
72
 
73
- # Create FAISS VectorStore with proper arguments
74
  langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
75
- docstore = InMemoryDocstore() # Create an in-memory docstore
76
  index_to_docstore_id = {} # Mapping of index to document ID
 
77
  langchain_vector_store = LangChainFAISS(
78
  embedding_function=OpenAIEmbeddings(),
79
  index=langchain_index,
80
  docstore=docstore,
81
  index_to_docstore_id=index_to_docstore_id,
82
  )
 
 
83
  langchain_vector_store.add_documents(documents)
84
 
85
- # LangChain Retrieval Chain
86
  retriever = langchain_vector_store.as_retriever()
87
  system_prompt = (
88
  "You are an assistant for question-answering tasks. "
@@ -97,47 +98,56 @@ if uploaded_file:
97
  question_answer_chain = create_stuff_documents_chain(ChatOpenAI(model="gpt-4o"), prompt)
98
  langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
99
 
100
- # Query input
101
  query = st.text_input("Ask a question about your data (LangChain):")
 
102
  if query:
103
- answer = langchain_rag_chain.invoke({"input": query})
104
- st.write(f"Answer: {answer['answer']}")
 
 
 
 
 
105
  except Exception as e:
106
  st.error(f"Error processing with LangChain: {e}")
107
 
108
- # LlamaIndex Tab
109
  with tab2:
110
  st.subheader("LlamaIndex Query")
 
111
  try:
112
- # Use PagedCSVReader directly on the uploaded file
113
  st.write("Loading file with LlamaIndex PagedCSVReader...")
114
  csv_reader = PagedCSVReader()
115
  docs = csv_reader.load_from_file(temp_file_path)
116
 
117
- # Debugging: Preview loaded documents
118
- st.write("Successfully loaded documents:")
119
- if docs:
120
- st.text(docs[0].text)
121
-
122
- # Initialize FAISS Vector Store
123
  llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
124
  llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
125
 
126
- # Create the ingestion pipeline and process the data
127
  pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
128
  nodes = pipeline.run()
129
 
130
- # Create a query engine
131
  llama_index = VectorStoreIndex(nodes)
132
  query_engine = llama_index.as_query_engine(similarity_top_k=3)
133
 
134
- # Query input
135
- query = st.text_input("Ask a question about your data (LlamaIndex):")
136
- if query:
137
- response = query_engine.query(query)
138
- st.write(f"Answer: {response.response}")
 
 
 
 
 
 
139
  except Exception as e:
140
  st.error(f"Error processing with LlamaIndex: {e}")
 
141
  finally:
142
  # Clean up the temporary file
143
  if 'temp_file_path' in locals() and os.path.exists(temp_file_path):
 
14
  from langchain.chains.combine_documents import create_stuff_documents_chain
15
  from langchain_core.prompts import ChatPromptTemplate
16
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
17
+ from langchain_core.documents import Document # βœ… FIX: Import LangChain Document
18
  import faiss
19
  import tempfile
20
 
 
45
  data.to_csv(temp_file.name, index=False, encoding="utf-8")
46
  temp_file.flush()
47
 
48
+ # Verify the temporary file (Display partial content)
49
  st.write("Temporary file path:", temp_file_path)
50
  with open(temp_file_path, "r") as f:
51
  content = f.read()
 
55
  # Tabs for LangChain and LlamaIndex
56
  tab1, tab2 = st.tabs(["LangChain", "LlamaIndex"])
57
 
58
+ # βœ… LangChain Processing with Proper Document Format
59
  with tab1:
60
  st.subheader("LangChain Query")
61
+
62
  try:
63
+ # βœ… Convert CSV rows into LangChain Document objects (Fix for `dict` error)
64
  st.write("Processing CSV with a custom loader...")
65
  documents = []
66
  for _, row in data.iterrows():
67
  content = "\n".join([f"{col}: {row[col]}" for col in data.columns])
68
+ doc = Document(page_content=content) # Convert to Document object
69
+ documents.append(doc) # Append to list
 
 
 
 
70
 
71
+ # βœ… Create FAISS VectorStore with proper arguments
72
  langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
73
+ docstore = InMemoryDocstore() # In-memory storage for documents
74
  index_to_docstore_id = {} # Mapping of index to document ID
75
+
76
  langchain_vector_store = LangChainFAISS(
77
  embedding_function=OpenAIEmbeddings(),
78
  index=langchain_index,
79
  docstore=docstore,
80
  index_to_docstore_id=index_to_docstore_id,
81
  )
82
+
83
+ # βœ… Add properly formatted documents to FAISS
84
  langchain_vector_store.add_documents(documents)
85
 
86
+ # βœ… LangChain Retrieval Chain
87
  retriever = langchain_vector_store.as_retriever()
88
  system_prompt = (
89
  "You are an assistant for question-answering tasks. "
 
98
  question_answer_chain = create_stuff_documents_chain(ChatOpenAI(model="gpt-4o"), prompt)
99
  langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
100
 
101
+ # βœ… Query Input Field for LangChain
102
  query = st.text_input("Ask a question about your data (LangChain):")
103
+
104
  if query:
105
+ try:
106
+ st.write("Processing your question...")
107
+ answer = langchain_rag_chain.invoke({"input": query})
108
+ st.write(f"**Answer:** {answer['answer']}")
109
+ except Exception as e:
110
+ st.error(f"Error processing query: {e}")
111
+
112
  except Exception as e:
113
  st.error(f"Error processing with LangChain: {e}")
114
 
115
+ # βœ… LlamaIndex Processing
116
  with tab2:
117
  st.subheader("LlamaIndex Query")
118
+
119
  try:
120
+ # Use PagedCSVReader to load CSV
121
  st.write("Loading file with LlamaIndex PagedCSVReader...")
122
  csv_reader = PagedCSVReader()
123
  docs = csv_reader.load_from_file(temp_file_path)
124
 
125
+ # βœ… Create FAISS Vector Store
 
 
 
 
 
126
  llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
127
  llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
128
 
129
+ # βœ… Create ingestion pipeline and process data
130
  pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
131
  nodes = pipeline.run()
132
 
133
+ # βœ… Create a query engine
134
  llama_index = VectorStoreIndex(nodes)
135
  query_engine = llama_index.as_query_engine(similarity_top_k=3)
136
 
137
+ # βœ… Query Input Field for LlamaIndex
138
+ query_llama = st.text_input("Ask a question about your data (LlamaIndex):")
139
+
140
+ if query_llama:
141
+ try:
142
+ st.write("Processing your question...")
143
+ response = query_engine.query(query_llama)
144
+ st.write(f"**Answer:** {response.response}")
145
+ except Exception as e:
146
+ st.error(f"Error processing query: {e}")
147
+
148
  except Exception as e:
149
  st.error(f"Error processing with LlamaIndex: {e}")
150
+
151
  finally:
152
  # Clean up the temporary file
153
  if 'temp_file_path' in locals() and os.path.exists(temp_file_path):