Nikhil0987 commited on
Commit
6da0fd1
·
verified ·
1 Parent(s): 65a34df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -33
app.py CHANGED
@@ -1,7 +1,7 @@
1
- from langchain.document_loaders import DirectoryLoader
2
  from langchain.text_splitter import CharacterTextSplitter
3
  import os
4
- import pinecone
5
  from langchain.vectorstores import Pinecone
6
  from langchain.embeddings.openai import OpenAIEmbeddings
7
  from langchain.chains import RetrievalQA
@@ -11,14 +11,23 @@ from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
 
14
-
15
  PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
16
  PINECONE_ENV = os.getenv('PINECONE_ENV')
17
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
18
-
19
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
22
  def doc_preprocessing():
23
  loader = DirectoryLoader(
24
  'data/',
@@ -33,38 +42,30 @@ def doc_preprocessing():
33
  docs_split = text_splitter.split_documents(docs)
34
  return docs_split
35
 
36
- @st.cache_resource
37
- def embedding_db():
38
- # we use the openAI embedding model
39
- embeddings = OpenAIEmbeddings()
40
 
41
- # Initialize Pinecone
42
- pc = Pinecone(
43
- api_key=PINECONE_API_KEY,
44
- environment=PINECONE_ENV
45
- )
46
 
47
- docs_split = doc_preprocessing()
48
-
49
- # Check if index exists, create if needed
50
- if 'langchain-demo-indexes' not in pc.list_indexes().names():
51
- pc.create_index(
52
- name='langchain-demo-indexes',
53
- dimension=1536, # Adjust dimension if needed
54
- metric='euclidean',
55
- spec=ServerlessSpec(cloud='aws', region='us-west-2')
56
- )
57
-
58
- doc_db = Pinecone.from_documents(
59
- docs_split,
60
- embeddings,
61
- index_name='langchain-demo-indexes',
62
- client=pc # Pass the Pinecone object
63
- )
64
- return doc_db
65
 
66
- llm = ChatOpenAI()
67
- doc_db = embedding_db()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  def retrieval_answer(query):
70
  qa = RetrievalQA.from_chain_type(
 
1
+ from langchain.document_loaders import DirectoryLoader
2
  from langchain.text_splitter import CharacterTextSplitter
3
  import os
4
+ import pinecone
5
  from langchain.vectorstores import Pinecone
6
  from langchain.embeddings.openai import OpenAIEmbeddings
7
  from langchain.chains import RetrievalQA
 
11
 
12
  load_dotenv()
13
 
 
14
  PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
15
  PINECONE_ENV = os.getenv('PINECONE_ENV')
16
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 
17
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
18
 
19
 
20
+
21
+ @st.cache_resource
22
+ def embedding_db():
23
+ # we use the openAI embedding model
24
+ embeddings = OpenAIEmbeddings()
25
+
26
+ # Initialize Pinecone: Updated method
27
+ pc = pinecone.init(
28
+ api_key=PINECONE_API_KEY,
29
+ environment=PINECONE_ENV
30
+
31
  def doc_preprocessing():
32
  loader = DirectoryLoader(
33
  'data/',
 
42
  docs_split = text_splitter.split_documents(docs)
43
  return docs_split
44
 
 
 
 
 
45
 
 
 
 
 
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # docs_split = doc_preprocessing()
49
+
50
+ # # Check if index exists, create if needed
51
+ # if 'langchain-demo-indexes' not in pc.list_indexes().names():
52
+ # pc.create_index(
53
+ # name='langchain-demo-indexes',
54
+ # dimension=1536, # Adjust dimension if needed
55
+ # metric='euclidean',
56
+ # spec=ServerlessSpec(cloud='aws', region='us-west-2')
57
+ # )
58
+
59
+ # doc_db = Pinecone.from_documents(
60
+ # docs_split,
61
+ # embeddings,
62
+ # index_name='langchain-demo-indexes',
63
+ # client=pc # Pass the Pinecone object
64
+ # )
65
+ # return doc_db
66
+
67
+ # llm = ChatOpenAI()
68
+ # doc_db = embedding_db()
69
 
70
  def retrieval_answer(query):
71
  qa = RetrievalQA.from_chain_type(