Spaces:
Sleeping
Sleeping
Commit
·
f2278eb
1
Parent(s):
b750929
samsara ceo interview pdf
Browse files- chainlit.md +1 -1
- data/Samsara_AG.pdf +0 -0
- data/musk-v-altman-openai-complaint-sf.pdf +0 -3
- data/nvidia_earnings_report.pdf +0 -3
- src/retrieval_lib.py +7 -5
chainlit.md
CHANGED
@@ -2,4 +2,4 @@
|
|
2 |
|
3 |
RAG over a PDF document
|
4 |
|
5 |
-
Disclaimer: this is running the query over the pdf document and generating answers using LLM.
|
|
|
2 |
|
3 |
RAG over a PDF document
|
4 |
|
5 |
+
Disclaimer: this is running the query over the pdf document and generating answers using LLM.
|
data/Samsara_AG.pdf
ADDED
Binary file (476 kB). View file
|
|
data/musk-v-altman-openai-complaint-sf.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:62f1c2ff5b9127e95afd844cd2b1af0405df5b0a51f3b2e71947cc393542e996
|
3 |
-
size 1575218
|
|
|
|
|
|
|
|
data/nvidia_earnings_report.pdf
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:536f66d7f1c3413abbf643e0a02bd0aab65639116fe630225f3f93529244658b
|
3 |
-
size 1074533
|
|
|
|
|
|
|
|
src/retrieval_lib.py
CHANGED
@@ -13,7 +13,11 @@ from langchain_core.output_parsers import StrOutputParser
|
|
13 |
from langchain_core.runnables import RunnablePassthrough
|
14 |
|
15 |
|
16 |
-
LLM_MODEL_NAME = "gpt-
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
# load PDF doc and convert to text
|
@@ -79,8 +83,7 @@ def initialize_index():
|
|
79 |
# load pdf
|
80 |
cwd = os.path.abspath(os.getcwd())
|
81 |
data_dir = "data"
|
82 |
-
pdf_file = "
|
83 |
-
# pdf_file = "musk-v-altman-openai-complaint-sf.pdf"
|
84 |
pdf_path = os.path.join(cwd, data_dir, pdf_file)
|
85 |
print("path: ", pdf_path)
|
86 |
doc = load_pdf_to_text(pdf_path)
|
@@ -92,8 +95,7 @@ def initialize_index():
|
|
92 |
|
93 |
def main():
|
94 |
retriever = initialize_index()
|
95 |
-
|
96 |
-
query = "what is the reason for the lawsuit"
|
97 |
retrieved_docs = query_index(retriever, query)
|
98 |
print("retrieved_docs: \n", len(retrieved_docs))
|
99 |
answer_prompt = create_answer_prompt()
|
|
|
13 |
from langchain_core.runnables import RunnablePassthrough
|
14 |
|
15 |
|
16 |
+
LLM_MODEL_NAME = "gpt-4-turbo"
|
17 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
18 |
+
os.environ["LANGCHAIN_PROJECT"] = f"fcb2b - {uuid4().hex[0:8]}"
|
19 |
+
|
20 |
+
# LLM_MODEL_NAME = "gpt-3.5-turbo"
|
21 |
|
22 |
|
23 |
# load PDF doc and convert to text
|
|
|
83 |
# load pdf
|
84 |
cwd = os.path.abspath(os.getcwd())
|
85 |
data_dir = "data"
|
86 |
+
pdf_file = "Samsara_AG.pdf"
|
|
|
87 |
pdf_path = os.path.join(cwd, data_dir, pdf_file)
|
88 |
print("path: ", pdf_path)
|
89 |
doc = load_pdf_to_text(pdf_path)
|
|
|
95 |
|
96 |
def main():
|
97 |
retriever = initialize_index()
|
98 |
+
query = "how to build the best product?"
|
|
|
99 |
retrieved_docs = query_index(retriever, query)
|
100 |
print("retrieved_docs: \n", len(retrieved_docs))
|
101 |
answer_prompt = create_answer_prompt()
|