DrishtiSharma commited on
Commit
f080dd9
·
verified ·
1 Parent(s): 556425f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -89
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import os
4
  from dotenv import load_dotenv
5
  from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
@@ -18,6 +19,7 @@ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
18
  import faiss
19
 
20
  # Load environment variables
 
21
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
22
 
23
  # Global settings for LlamaIndex
@@ -26,96 +28,99 @@ Settings.llm = OpenAI(model="gpt-3.5-turbo")
26
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
27
 
28
  # Streamlit app
29
- st.title("Chat w CSV Files - LangChain Vs LlamaIndex ")
30
 
31
  # File uploader
32
  uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
33
  if uploaded_file:
34
- # Save the uploaded file temporarily
35
- temp_file_path = f"temp_{uploaded_file.name}"
36
- with open(temp_file_path, "wb") as temp_file:
37
- temp_file.write(uploaded_file.getbuffer())
38
-
39
- # Read and preview CSV data
40
- data = pd.read_csv(temp_file_path)
41
- st.write("Preview of uploaded data:")
42
- st.dataframe(data)
43
-
44
- # Tabs
45
- tab1, tab2 = st.tabs(["Chat w CSV using LangChain", "Chat w CSV using LlamaIndex"])
46
-
47
- # LangChain Tab
48
- with tab1:
49
- st.subheader("LangChain Query")
50
- loader = CSVLoader(file_path=temp_file_path)
51
- docs = loader.load_and_split()
52
-
53
- # Preview the first document
54
- if docs:
55
- st.write("Preview of a document chunk (LangChain):")
56
- st.text(docs[0].page_content)
57
-
58
- # LangChain FAISS VectorStore
59
- langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
60
- langchain_vector_store = LangChainFAISS(
61
- embedding_function=OpenAIEmbeddings(),
62
- index=langchain_index,
63
- )
64
- langchain_vector_store.add_documents(docs)
65
-
66
- # LangChain Retrieval Chain
67
- retriever = langchain_vector_store.as_retriever()
68
- system_prompt = (
69
- "You are an assistant for question-answering tasks. "
70
- "Use the following pieces of retrieved context to answer "
71
- "the question. If you don't know the answer, say that you "
72
- "don't know. Use three sentences maximum and keep the "
73
- "answer concise.\n\n{context}"
74
- )
75
- prompt = ChatPromptTemplate.from_messages(
76
- [("system", system_prompt), ("human", "{input}")]
77
- )
78
- question_answer_chain = create_stuff_documents_chain(ChatOpenAI(), prompt)
79
- langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
80
-
81
- # Query input for LangChain
82
- query = st.text_input("Ask a question about your data (LangChain):")
83
- if query:
84
- answer = langchain_rag_chain.invoke({"input": query})
85
- st.write(f"Answer: {answer['answer']}")
86
-
87
- # LlamaIndex Tab
88
- with tab2:
89
- st.subheader("LlamaIndex Query")
90
- csv_reader = PagedCSVReader()
91
- reader = SimpleDirectoryReader(
92
- input_files=[temp_file_path],
93
- file_extractor={".csv": csv_reader},
94
- )
95
- docs = reader.load_data()
96
-
97
- # Preview the first document
98
- if docs:
99
- st.write("Preview of a document chunk (LlamaIndex):")
100
- st.text(docs[0].text)
101
-
102
- # Initialize FAISS Vector Store
103
- llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
104
- llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
105
-
106
- # Create the ingestion pipeline and process the data
107
- pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
108
- nodes = pipeline.run()
109
-
110
- # Create a query engine
111
- llama_index = VectorStoreIndex(nodes)
112
- query_engine = llama_index.as_query_engine(similarity_top_k=3)
113
-
114
- # Query input for LlamaIndex
115
- query = st.text_input("Ask a question about your data (LlamaIndex):")
116
- if query:
117
- response = query_engine.query(query)
118
- st.write(f"Answer: {response.response}")
119
-
120
- # Cleanup temporary file
121
- os.remove(temp_file_path)
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import io
4
  import os
5
  from dotenv import load_dotenv
6
  from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
 
19
  import faiss
20
 
21
  # Load environment variables
22
+ load_dotenv()
23
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
24
 
25
  # Global settings for LlamaIndex
 
28
  Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
29
 
30
  # Streamlit app
31
+ st.title("Chat with CSV Files - LangChain vs LlamaIndex")
32
 
33
  # File uploader
34
  uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
35
  if uploaded_file:
36
+ try:
37
+ # Load and preview CSV data using pandas
38
+ data = pd.read_csv(uploaded_file)
39
+ st.write("Preview of uploaded data:")
40
+ st.dataframe(data)
41
+
42
+ # Tabs
43
+ tab1, tab2 = st.tabs(["Chat w CSV using LangChain", "Chat w CSV using LlamaIndex"])
44
+
45
+ # LangChain Tab
46
+ with tab1:
47
+ st.subheader("LangChain Query")
48
+ try:
49
+ # Use CSVLoader directly with file-like object
50
+ loader = CSVLoader(file_path=io.BytesIO(uploaded_file.getvalue()))
51
+ docs = loader.load_and_split()
52
+
53
+ # Preview the first document
54
+ if docs:
55
+ st.write("Preview of a document chunk (LangChain):")
56
+ st.text(docs[0].page_content)
57
+
58
+ # LangChain FAISS VectorStore
59
+ langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
60
+ langchain_vector_store = LangChainFAISS(
61
+ embedding_function=OpenAIEmbeddings(),
62
+ index=langchain_index,
63
+ )
64
+ langchain_vector_store.add_documents(docs)
65
+
66
+ # LangChain Retrieval Chain
67
+ retriever = langchain_vector_store.as_retriever()
68
+ system_prompt = (
69
+ "You are an assistant for question-answering tasks. "
70
+ "Use the following pieces of retrieved context to answer "
71
+ "the question. If you don't know the answer, say that you "
72
+ "don't know. Use three sentences maximum and keep the "
73
+ "answer concise.\n\n{context}"
74
+ )
75
+ prompt = ChatPromptTemplate.from_messages(
76
+ [("system", system_prompt), ("human", "{input}")]
77
+ )
78
+ question_answer_chain = create_stuff_documents_chain(ChatOpenAI(), prompt)
79
+ langchain_rag_chain = create_retrieval_chain(retriever, question_answer_chain)
80
+
81
+ # Query input for LangChain
82
+ query = st.text_input("Ask a question about your data (LangChain):")
83
+ if query:
84
+ answer = langchain_rag_chain.invoke({"input": query})
85
+ st.write(f"Answer: {answer['answer']}")
86
+ except Exception as e:
87
+ st.error(f"Error processing with LangChain: {e}")
88
+
89
+ # LlamaIndex Tab
90
+ with tab2:
91
+ st.subheader("LlamaIndex Query")
92
+ try:
93
+ # Use PagedCSVReader for LlamaIndex
94
+ csv_reader = PagedCSVReader()
95
+ reader = SimpleDirectoryReader(
96
+ input_files=[uploaded_file.name],
97
+ file_extractor={".csv": csv_reader},
98
+ )
99
+ docs = reader.load_data()
100
+
101
+ # Preview the first document
102
+ if docs:
103
+ st.write("Preview of a document chunk (LlamaIndex):")
104
+ st.text(docs[0].text)
105
+
106
+ # Initialize FAISS Vector Store
107
+ llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
108
+ llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
109
+
110
+ # Create the ingestion pipeline and process the data
111
+ pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
112
+ nodes = pipeline.run()
113
+
114
+ # Create a query engine
115
+ llama_index = VectorStoreIndex(nodes)
116
+ query_engine = llama_index.as_query_engine(similarity_top_k=3)
117
+
118
+ # Query input for LlamaIndex
119
+ query = st.text_input("Ask a question about your data (LlamaIndex):")
120
+ if query:
121
+ response = query_engine.query(query)
122
+ st.write(f"Answer: {response.response}")
123
+ except Exception as e:
124
+ st.error(f"Error processing with LlamaIndex: {e}")
125
+ except Exception as e:
126
+ st.error(f"Error reading uploaded file: {e}")