Spaces:
Sleeping
Sleeping
gizemsarsinlar
commited on
Commit
•
0e27321
1
Parent(s):
31909e5
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
19 |
from langchain_community.document_loaders import ArxivLoader
|
20 |
from langchain_community.document_transformers import LongContextReorder
|
21 |
|
|
|
|
|
22 |
|
23 |
import os
|
24 |
|
@@ -61,19 +63,22 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
61 |
separators=["\n\n", "\n", ".", ";", ",", " "],
|
62 |
)
|
63 |
|
64 |
-
docs = [
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
]
|
|
|
|
|
|
|
77 |
|
78 |
## Cut the paper short if references is included.
|
79 |
## This is a standard string in papers.
|
|
|
19 |
from langchain_community.document_loaders import ArxivLoader
|
20 |
from langchain_community.document_transformers import LongContextReorder
|
21 |
|
22 |
+
from langchain_community.document_loaders import PyPDFLoader
|
23 |
+
|
24 |
|
25 |
import os
|
26 |
|
|
|
63 |
separators=["\n\n", "\n", ".", ";", ",", " "],
|
64 |
)
|
65 |
|
66 |
+
# docs = [
|
67 |
+
# ArxivLoader(query="1706.03762").load(), ## Attention Is All You Need Paper
|
68 |
+
# ArxivLoader(query="1810.04805").load(), ## BERT Paper
|
69 |
+
# ArxivLoader(query="2005.11401").load(), ## RAG Paper
|
70 |
+
# ArxivLoader(query="2205.00445").load(), ## MRKL Paper
|
71 |
+
# ArxivLoader(query="2310.06825").load(), ## Mistral Paper
|
72 |
+
# ArxivLoader(query="2306.05685").load(), ## LLM-as-a-Judge
|
73 |
+
# ## Some longer papers
|
74 |
+
# ArxivLoader(query="2210.03629").load(), ## ReAct Paper
|
75 |
+
# ArxivLoader(query="2112.10752").load(), ## Latent Stable Diffusion Paper
|
76 |
+
# ArxivLoader(query="2103.00020").load(), ## CLIP Paper
|
77 |
+
# ## TODO: Feel free to add more
|
78 |
+
# ]
|
79 |
+
|
80 |
+
loader = PyPDFLoader("transcript.pdf")
|
81 |
+
docs = loader.load()
|
82 |
|
83 |
## Cut the paper short if references is included.
|
84 |
## This is a standard string in papers.
|