Nikhil0987 commited on
Commit
ca6013c
·
verified ·
1 Parent(s): c33fb07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -44
app.py CHANGED
@@ -10,84 +10,73 @@ import streamlit as st
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
13
-
14
  PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
15
  PINECONE_ENV = os.getenv('PINECONE_ENV')
16
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
17
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
18
 
19
-
20
-
21
  @st.cache_resource
22
  def embedding_db():
23
- # we use the openAI embedding model
24
  embeddings = OpenAIEmbeddings()
25
-
26
- # Initialize Pinecone: Correct Indentation
27
  pc = pinecone.init(
28
- api_key=PINECONE_API_KEY,
29
- environment=PINECONE_ENV
30
  )
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def doc_preprocessing():
33
  loader = DirectoryLoader(
34
  'data/',
35
- glob='**/*.pdf', # only the PDFs
36
  show_progress=True
37
  )
38
  docs = loader.load()
39
  text_splitter = CharacterTextSplitter(
40
- chunk_size=1000,
41
  chunk_overlap=0
42
  )
43
  docs_split = text_splitter.split_documents(docs)
44
  return docs_split
45
 
46
-
47
-
48
-
49
- # docs_split = doc_preprocessing()
50
-
51
- # # Check if index exists, create if needed
52
- # if 'langchain-demo-indexes' not in pc.list_indexes().names():
53
- # pc.create_index(
54
- # name='langchain-demo-indexes',
55
- # dimension=1536, # Adjust dimension if needed
56
- # metric='euclidean',
57
- # spec=ServerlessSpec(cloud='aws', region='us-west-2')
58
- # )
59
-
60
- # doc_db = Pinecone.from_documents(
61
- # docs_split,
62
- # embeddings,
63
- # index_name='langchain-demo-indexes',
64
- # client=pc # Pass the Pinecone object
65
- # )
66
- # return doc_db
67
-
68
- # llm = ChatOpenAI()
69
- # doc_db = embedding_db()
70
-
71
  def retrieval_answer(query):
72
- chat_model = ChatOpenAI() # Create the LLM instance
73
  qa = RetrievalQA.from_chain_type(
74
- llm=chat_model, # Pass the chat_model instance
75
  chain_type='stuff',
76
  retriever=doc_db.as_retriever(),
77
- )
78
- query = query
79
  result = qa.run(query)
80
  return result
81
 
82
  def main():
83
  st.title("Question and Answering App powered by LLM and Pinecone")
84
-
85
  text_input = st.text_input("Ask your query...")
 
86
  if st.button("Ask Query"):
87
- if len(text_input)>0:
88
  st.info("Your Query: " + text_input)
89
- answer = retrieval_answer(text_input)
 
 
 
 
90
  st.success(answer)
91
 
92
- if __name__ == "__main__":
93
- main()
 
10
  from dotenv import load_dotenv
11
 
12
  load_dotenv()
 
13
  PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
14
  PINECONE_ENV = os.getenv('PINECONE_ENV')
15
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
16
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
17
 
 
 
18
  @st.cache_resource
19
  def embedding_db():
 
20
  embeddings = OpenAIEmbeddings()
 
 
21
  pc = pinecone.init(
22
+ api_key=PINECONE_API_KEY,
23
+ environment=PINECONE_ENV
24
  )
25
 
26
+ # Check if index exists, create if needed
27
+ if 'langchain-demo-indexes' not in pc.list_indexes().names():
28
+ pc.create_index(
29
+ name='langchain-demo-indexes',
30
+ dimension=1536, # Adjust dimension if needed
31
+ metric='euclidean'
32
+ )
33
+
34
+ docs_split = doc_preprocessing() # Make sure this function is defined
35
+ doc_db = Pinecone.from_documents(
36
+ docs_split,
37
+ embeddings,
38
+ index_name='langchain-demo-indexes',
39
+ client=pc
40
+ )
41
+ return doc_db
42
+
43
  def doc_preprocessing():
44
  loader = DirectoryLoader(
45
  'data/',
46
+ glob='**/*.pdf',
47
  show_progress=True
48
  )
49
  docs = loader.load()
50
  text_splitter = CharacterTextSplitter(
51
+ chunk_size=1000,
52
  chunk_overlap=0
53
  )
54
  docs_split = text_splitter.split_documents(docs)
55
  return docs_split
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def retrieval_answer(query):
58
+ chat_model = ChatOpenAI()
59
  qa = RetrievalQA.from_chain_type(
60
+ llm=chat_model,
61
  chain_type='stuff',
62
  retriever=doc_db.as_retriever(),
63
+ )
 
64
  result = qa.run(query)
65
  return result
66
 
67
  def main():
68
  st.title("Question and Answering App powered by LLM and Pinecone")
 
69
  text_input = st.text_input("Ask your query...")
70
+
71
  if st.button("Ask Query"):
72
+ if len(text_input) > 0:
73
  st.info("Your Query: " + text_input)
74
+
75
+ # Potential loading message
76
+ with st.spinner("Processing your query..."):
77
+ doc_db = embedding_db() # Create the embedding database
78
+ answer = retrieval_answer(text_input)
79
  st.success(answer)
80
 
81
+ if __name__ == "__main__":
82
+ main()