Spaces:

mitultiwari
/

rag-app

Sleeping

mitultiwari commited on May 8, 2024

Commit

f2278eb

1 Parent(s): b750929

samsara ceo interview pdf

Files changed (5) hide show

chainlit.md CHANGED Viewed

@@ -2,4 +2,4 @@
 RAG over a PDF document
-Disclaimer: this is running the query over the pdf document and generating answers using LLM. LLMs can hellucinate and can generate wrong answers.


2
3	RAG over a PDF document
4
5	+ Disclaimer: this is running the query over the pdf document and generating answers using LLM.

data/Samsara_AG.pdf ADDED Viewed

Binary file (476 kB). View file

data/musk-v-altman-openai-complaint-sf.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:62f1c2ff5b9127e95afd844cd2b1af0405df5b0a51f3b2e71947cc393542e996
-size 1575218

data/nvidia_earnings_report.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:536f66d7f1c3413abbf643e0a02bd0aab65639116fe630225f3f93529244658b
-size 1074533

src/retrieval_lib.py CHANGED Viewed

@@ -13,7 +13,11 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-LLM_MODEL_NAME = "gpt-3.5-turbo"
 # load PDF doc and convert to text
@@ -79,8 +83,7 @@ def initialize_index():
     # load pdf
     cwd = os.path.abspath(os.getcwd())
     data_dir = "data"
-    pdf_file = "nvidia_earnings_report.pdf"
-    # pdf_file = "musk-v-altman-openai-complaint-sf.pdf"
     pdf_path = os.path.join(cwd, data_dir, pdf_file)
     print("path: ", pdf_path)
     doc = load_pdf_to_text(pdf_path)
@@ -92,8 +95,7 @@ def initialize_index():
 def main():
     retriever = initialize_index()
-    # query = "Who is the E-VP, Operations"
-    query = "what is the reason for the lawsuit"
     retrieved_docs = query_index(retriever, query)
     print("retrieved_docs: \n", len(retrieved_docs))
     answer_prompt = create_answer_prompt()

 from langchain_core.runnables import RunnablePassthrough
+LLM_MODEL_NAME = "gpt-4-turbo"
+os.environ["LANGCHAIN_TRACING_V2"] = "true"
+os.environ["LANGCHAIN_PROJECT"] = f"fcb2b - {uuid4().hex[0:8]}"
+# LLM_MODEL_NAME = "gpt-3.5-turbo"
 # load PDF doc and convert to text
     # load pdf
     cwd = os.path.abspath(os.getcwd())
     data_dir = "data"
+    pdf_file = "Samsara_AG.pdf"
     pdf_path = os.path.join(cwd, data_dir, pdf_file)
     print("path: ", pdf_path)
     doc = load_pdf_to_text(pdf_path)
 def main():
     retriever = initialize_index()
+    query = "how to build the best product?"
     retrieved_docs = query_index(retriever, query)
     print("retrieved_docs: \n", len(retrieved_docs))
     answer_prompt = create_answer_prompt()