mitultiwari commited on
Commit
f2278eb
·
1 Parent(s): b750929

samsara ceo interview pdf

Browse files
chainlit.md CHANGED
@@ -2,4 +2,4 @@
2
 
3
  RAG over a PDF document
4
 
5
- Disclaimer: this is running the query over the pdf document and generating answers using LLM. LLMs can hellucinate and can generate wrong answers.
 
2
 
3
  RAG over a PDF document
4
 
5
+ Disclaimer: this is running the query over the pdf document and generating answers using LLM.
data/Samsara_AG.pdf ADDED
Binary file (476 kB). View file
 
data/musk-v-altman-openai-complaint-sf.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:62f1c2ff5b9127e95afd844cd2b1af0405df5b0a51f3b2e71947cc393542e996
3
- size 1575218
 
 
 
 
data/nvidia_earnings_report.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:536f66d7f1c3413abbf643e0a02bd0aab65639116fe630225f3f93529244658b
3
- size 1074533
 
 
 
 
src/retrieval_lib.py CHANGED
@@ -13,7 +13,11 @@ from langchain_core.output_parsers import StrOutputParser
13
  from langchain_core.runnables import RunnablePassthrough
14
 
15
 
16
- LLM_MODEL_NAME = "gpt-3.5-turbo"
 
 
 
 
17
 
18
 
19
  # load PDF doc and convert to text
@@ -79,8 +83,7 @@ def initialize_index():
79
  # load pdf
80
  cwd = os.path.abspath(os.getcwd())
81
  data_dir = "data"
82
- pdf_file = "nvidia_earnings_report.pdf"
83
- # pdf_file = "musk-v-altman-openai-complaint-sf.pdf"
84
  pdf_path = os.path.join(cwd, data_dir, pdf_file)
85
  print("path: ", pdf_path)
86
  doc = load_pdf_to_text(pdf_path)
@@ -92,8 +95,7 @@ def initialize_index():
92
 
93
  def main():
94
  retriever = initialize_index()
95
- # query = "Who is the E-VP, Operations"
96
- query = "what is the reason for the lawsuit"
97
  retrieved_docs = query_index(retriever, query)
98
  print("retrieved_docs: \n", len(retrieved_docs))
99
  answer_prompt = create_answer_prompt()
 
13
  from langchain_core.runnables import RunnablePassthrough
14
 
15
 
16
+ LLM_MODEL_NAME = "gpt-4-turbo"
17
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
18
+ os.environ["LANGCHAIN_PROJECT"] = f"fcb2b - {uuid4().hex[0:8]}"
19
+
20
+ # LLM_MODEL_NAME = "gpt-3.5-turbo"
21
 
22
 
23
  # load PDF doc and convert to text
 
83
  # load pdf
84
  cwd = os.path.abspath(os.getcwd())
85
  data_dir = "data"
86
+ pdf_file = "Samsara_AG.pdf"
 
87
  pdf_path = os.path.join(cwd, data_dir, pdf_file)
88
  print("path: ", pdf_path)
89
  doc = load_pdf_to_text(pdf_path)
 
95
 
96
  def main():
97
  retriever = initialize_index()
98
+ query = "how to build the best product?"
 
99
  retrieved_docs = query_index(retriever, query)
100
  print("retrieved_docs: \n", len(retrieved_docs))
101
  answer_prompt = create_answer_prompt()