DrishtiSharma commited on
Commit
dff1e7c
·
verified ·
1 Parent(s): 7abc67e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -2,26 +2,29 @@ import streamlit as st
2
  import pandas as pd
3
  import os
4
  from dotenv import load_dotenv
 
 
 
 
 
 
5
  from langchain_community.document_loaders.csv_loader import CSVLoader
6
  from langchain_community.vectorstores import FAISS as LangChainFAISS
7
  from langchain.chains import create_retrieval_chain
8
  from langchain.chains.combine_documents import create_stuff_documents_chain
9
  from langchain_core.prompts import ChatPromptTemplate
10
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
11
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
12
- from llama_index.embeddings.openai import OpenAIEmbedding
13
- from llama_index.vector_stores.faiss import FaissVectorStore
14
- from llama_index.llms.openai import OpenAI
15
  import faiss
16
 
17
  # Load environment variables
 
18
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
19
 
20
- # Set global OpenAI parameters
21
  EMBED_DIMENSION = 512
22
  llama_llm = OpenAI(model="gpt-3.5-turbo")
23
  llama_embedding_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
24
- langchain_llm = ChatOpenAI(model="gpt-4o")
25
 
26
  # Streamlit app
27
  st.title("Streamlit App with LangChain and LlamaIndex")
@@ -39,9 +42,13 @@ if uploaded_file:
39
  # LangChain Tab
40
  with tab1:
41
  st.subheader("LangChain Query")
42
- loader = CSVLoader(file_path=uploaded_file)
43
  docs = loader.load_and_split()
44
 
 
 
 
 
45
  # LangChain FAISS VectorStore
46
  langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
47
  langchain_vector_store = LangChainFAISS(
@@ -74,19 +81,27 @@ if uploaded_file:
74
  # LlamaIndex Tab
75
  with tab2:
76
  st.subheader("LlamaIndex Query")
77
- csv_reader = SimpleDirectoryReader(
78
- input_files=[uploaded_file],
79
- file_extractor={".csv": PagedCSVReader()},
 
 
80
  )
81
- docs = csv_reader.load_data()
82
 
83
- # LlamaIndex FAISS VectorStore
 
 
 
 
84
  llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
85
  llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
 
 
86
  pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
87
  nodes = pipeline.run()
88
 
89
- # LlamaIndex Query Engine
90
  llama_index = VectorStoreIndex(nodes)
91
  query_engine = llama_index.as_query_engine(similarity_top_k=2)
92
 
 
2
  import pandas as pd
3
  import os
4
  from dotenv import load_dotenv
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
6
+ from llama_index.readers.file import PagedCSVReader
7
+ from llama_index.embeddings.openai import OpenAIEmbedding
8
+ from llama_index.llms.openai import OpenAI
9
+ from llama_index.vector_stores.faiss import FaissVectorStore
10
+ from llama_index.core.ingestion import IngestionPipeline
11
  from langchain_community.document_loaders.csv_loader import CSVLoader
12
  from langchain_community.vectorstores import FAISS as LangChainFAISS
13
  from langchain.chains import create_retrieval_chain
14
  from langchain.chains.combine_documents import create_stuff_documents_chain
15
  from langchain_core.prompts import ChatPromptTemplate
16
  from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 
 
 
 
17
  import faiss
18
 
19
  # Load environment variables
20
+ load_dotenv()
21
  os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
22
 
23
+ # Global OpenAI and FAISS settings
24
  EMBED_DIMENSION = 512
25
  llama_llm = OpenAI(model="gpt-3.5-turbo")
26
  llama_embedding_model = OpenAIEmbedding(model="text-embedding-3-small", dimensions=EMBED_DIMENSION)
27
+ langchain_llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
28
 
29
  # Streamlit app
30
  st.title("Streamlit App with LangChain and LlamaIndex")
 
42
  # LangChain Tab
43
  with tab1:
44
  st.subheader("LangChain Query")
45
+ loader = CSVLoader(file_path=uploaded_file.name)
46
  docs = loader.load_and_split()
47
 
48
+ # Preview the first document
49
+ st.write("Preview of a document chunk (LangChain):")
50
+ st.text(docs[0].page_content)
51
+
52
  # LangChain FAISS VectorStore
53
  langchain_index = faiss.IndexFlatL2(EMBED_DIMENSION)
54
  langchain_vector_store = LangChainFAISS(
 
81
  # LlamaIndex Tab
82
  with tab2:
83
  st.subheader("LlamaIndex Query")
84
+ # Use PagedCSVReader for CSV loading
85
+ csv_reader = PagedCSVReader()
86
+ reader = SimpleDirectoryReader(
87
+ input_files=[uploaded_file.name],
88
+ file_extractor={".csv": csv_reader},
89
  )
90
+ docs = reader.load_data()
91
 
92
+ # Preview the first document
93
+ st.write("Preview of a document chunk (LlamaIndex):")
94
+ st.text(docs[0].text)
95
+
96
+ # Initialize FAISS Vector Store
97
  llama_faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
98
  llama_vector_store = FaissVectorStore(faiss_index=llama_faiss_index)
99
+
100
+ # Create the ingestion pipeline and process the data
101
  pipeline = IngestionPipeline(vector_store=llama_vector_store, documents=docs)
102
  nodes = pipeline.run()
103
 
104
+ # Create a query engine
105
  llama_index = VectorStoreIndex(nodes)
106
  query_engine = llama_index.as_query_engine(similarity_top_k=2)
107