Spaces:
Sleeping
Sleeping
fixed bugs involving MVP and updated requirements.txt
Browse files- app.py +13 -6
- requirements.txt +255 -0
app.py
CHANGED
@@ -11,21 +11,26 @@ from langchain.llms import OpenAI
|
|
11 |
text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
|
12 |
|
13 |
from langchain.llms import HuggingFaceHub
|
14 |
-
flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
|
15 |
-
|
16 |
|
17 |
global qa
|
18 |
|
19 |
-
from langchain.embeddings import HuggingFaceHubEmbeddings
|
20 |
-
embeddings = HuggingFaceHubEmbeddings()
|
|
|
21 |
|
22 |
from langchain.vectorstores import Chroma
|
23 |
|
24 |
from langchain.chains import RetrievalQA
|
|
|
|
|
|
|
25 |
def loading_pdf():
|
26 |
return "Loading..."
|
27 |
def pdf_changes(pdf_doc):
|
28 |
-
loader = OnlinePDFLoader(pdf_doc.name)
|
|
|
29 |
documents = loader.load()
|
30 |
texts = text_splitter.split_documents(documents)
|
31 |
db = Chroma.from_documents(texts, embeddings)
|
@@ -85,9 +90,11 @@ title = """
|
|
85 |
with gr.Blocks(css=css) as demo:
|
86 |
with gr.Column(elem_id="col-container"):
|
87 |
gr.HTML(title)
|
|
|
88 |
|
89 |
with gr.Column():
|
90 |
-
pdf_doc = gr.File(
|
|
|
91 |
with gr.Row():
|
92 |
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
|
93 |
load_pdf = gr.Button("Load pdf to langchain")
|
|
|
11 |
text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
|
12 |
|
13 |
from langchain.llms import HuggingFaceHub
|
14 |
+
# flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
|
15 |
+
flan_ul2 = OpenAI()
|
16 |
|
17 |
global qa
|
18 |
|
19 |
+
from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
|
20 |
+
# embeddings = HuggingFaceHubEmbeddings()
|
21 |
+
embeddings = OpenAIEmbeddings()
|
22 |
|
23 |
from langchain.vectorstores import Chroma
|
24 |
|
25 |
from langchain.chains import RetrievalQA
|
26 |
+
|
27 |
+
from langchain.document_loaders import PyPDFLoader
|
28 |
+
|
29 |
def loading_pdf():
|
30 |
return "Loading..."
|
31 |
def pdf_changes(pdf_doc):
|
32 |
+
# loader = OnlinePDFLoader(pdf_doc.name)
|
33 |
+
loader = PyPDFLoader(pdf_doc.name)
|
34 |
documents = loader.load()
|
35 |
texts = text_splitter.split_documents(documents)
|
36 |
db = Chroma.from_documents(texts, embeddings)
|
|
|
90 |
with gr.Blocks(css=css) as demo:
|
91 |
with gr.Column(elem_id="col-container"):
|
92 |
gr.HTML(title)
|
93 |
+
# with gr.Blocks() as demo:
|
94 |
|
95 |
with gr.Column():
|
96 |
+
pdf_doc = gr.File()
|
97 |
+
# pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
|
98 |
with gr.Row():
|
99 |
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
|
100 |
load_pdf = gr.Button("Load pdf to langchain")
|
requirements.txt
CHANGED
@@ -245,3 +245,258 @@ versioneer==0.29
|
|
245 |
websockets==11.0.3
|
246 |
wrapt==1.16.0
|
247 |
yarl==1.9.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
websockets==11.0.3
|
246 |
wrapt==1.16.0
|
247 |
yarl==1.9.4
|
248 |
+
aiofiles==23.2.1
|
249 |
+
aiohttp==3.9.1
|
250 |
+
aiosignal==1.3.1
|
251 |
+
aiostream==0.5.2
|
252 |
+
altair==5.1.2
|
253 |
+
annotated-types==0.5.0
|
254 |
+
anyio==3.7.1
|
255 |
+
appdirs==1.4.4
|
256 |
+
argcomplete==1.8.1
|
257 |
+
astor==0.8.1
|
258 |
+
asttokens==2.4.1
|
259 |
+
async-timeout==4.0.3
|
260 |
+
asyncer==0.0.2
|
261 |
+
attrs==23.1.0
|
262 |
+
auth0-python==4.4.2
|
263 |
+
Babel==2.8.0
|
264 |
+
backoff==2.2.1
|
265 |
+
beautiful-date==2.2.1
|
266 |
+
beautifulsoup4==4.12.2
|
267 |
+
bidict==0.22.1
|
268 |
+
blessed==1.20.0
|
269 |
+
blinker==1.4
|
270 |
+
Brotli==1.0.9
|
271 |
+
CacheControl==0.12.10
|
272 |
+
cachetools==5.3.1
|
273 |
+
cachy==0.3.0
|
274 |
+
certifi==2023.7.22
|
275 |
+
cffi==1.15.1
|
276 |
+
chardet==4.0.0
|
277 |
+
charset-normalizer==3.2.0
|
278 |
+
cleo==0.8.1
|
279 |
+
click==8.1.7
|
280 |
+
clikit==0.6.2
|
281 |
+
colorama==0.4.4
|
282 |
+
comm==0.2.0
|
283 |
+
command-not-found==0.3
|
284 |
+
contourpy==1.2.0
|
285 |
+
crashtest==0.3.1
|
286 |
+
cryptography==41.0.3
|
287 |
+
cycler==0.12.1
|
288 |
+
dataclasses-json==0.5.14
|
289 |
+
dbus-python==1.2.18
|
290 |
+
debugpy==1.8.0
|
291 |
+
decorator==5.1.1
|
292 |
+
Deprecated==1.2.14
|
293 |
+
distlib==0.3.4
|
294 |
+
distro==1.7.0
|
295 |
+
distro-info==1.1+ubuntu0.1
|
296 |
+
exceptiongroup==1.1.3
|
297 |
+
executing==2.0.1
|
298 |
+
fastapi==0.104.1
|
299 |
+
fastapi-socketio==0.0.10
|
300 |
+
ffmpy==0.3.1
|
301 |
+
filelock==3.6.0
|
302 |
+
filetype==1.2.0
|
303 |
+
fonttools==4.44.3
|
304 |
+
frozenlist==1.4.0
|
305 |
+
fsspec==2023.10.0
|
306 |
+
gcsa==2.1.0
|
307 |
+
gdown==4.7.1
|
308 |
+
git-python==1.0.3
|
309 |
+
gitdb==4.0.11
|
310 |
+
GitPython==3.1.40
|
311 |
+
google-api-core==2.11.1
|
312 |
+
google-api-python-client==2.99.0
|
313 |
+
google-auth==2.23.0
|
314 |
+
google-auth-httplib2==0.1.1
|
315 |
+
google-auth-oauthlib==0.8.0
|
316 |
+
googleapis-common-protos==1.60.0
|
317 |
+
gradio==4.4.1
|
318 |
+
gradio_client==0.7.0
|
319 |
+
graphviz==0.14.2
|
320 |
+
greenlet==2.0.2
|
321 |
+
grpcio==1.58.0
|
322 |
+
gyp==0.1
|
323 |
+
h11==0.14.0
|
324 |
+
html2image==2.0.4.3
|
325 |
+
html5lib==1.1
|
326 |
+
httpcore==0.18.0
|
327 |
+
httplib2==0.20.2
|
328 |
+
httpx==0.25.0
|
329 |
+
huggingface-hub==0.19.4
|
330 |
+
idna==3.4
|
331 |
+
importlib-metadata==6.8.0
|
332 |
+
importlib-resources==6.1.1
|
333 |
+
inquirer==3.1.4
|
334 |
+
ipykernel==6.26.0
|
335 |
+
ipython==8.18.0
|
336 |
+
jedi==0.19.1
|
337 |
+
jeepney==0.7.1
|
338 |
+
Jinja2==3.1.2
|
339 |
+
joblib==1.3.2
|
340 |
+
jsonschema==4.19.2
|
341 |
+
jsonschema-specifications==2023.7.1
|
342 |
+
jupyter_client==8.6.0
|
343 |
+
jupyter_core==5.5.0
|
344 |
+
keyring==21.8.0
|
345 |
+
kiwisolver==1.4.5
|
346 |
+
langchain==0.0.281
|
347 |
+
langsmith==0.0.33
|
348 |
+
launchpadlib==1.10.16
|
349 |
+
Lazify==0.4.0
|
350 |
+
lazr.restfulclient==0.14.4
|
351 |
+
lazr.uri==1.0.6
|
352 |
+
litellm==0.13.2
|
353 |
+
livereload==2.6.3
|
354 |
+
llama-index==0.9.13
|
355 |
+
lockfile==0.12.2
|
356 |
+
Markdown==3.3.6
|
357 |
+
markdown-it-py==3.0.0
|
358 |
+
MarkupSafe==2.0.1
|
359 |
+
marshmallow==3.20.1
|
360 |
+
matplotlib==3.8.2
|
361 |
+
matplotlib-inline==0.1.6
|
362 |
+
mdurl==0.1.2
|
363 |
+
mkdocs==1.1.2
|
364 |
+
more-itertools==8.10.0
|
365 |
+
msgpack==1.0.3
|
366 |
+
multidict==6.0.4
|
367 |
+
mutagen==1.45.1
|
368 |
+
mypy-extensions==1.0.0
|
369 |
+
nest-asyncio==1.5.8
|
370 |
+
netifaces==0.11.0
|
371 |
+
nltk==3.8.1
|
372 |
+
nodeenv==1.8.0
|
373 |
+
numexpr==2.8.5
|
374 |
+
numpy==1.25.2
|
375 |
+
oauthlib==3.2.0
|
376 |
+
open-interpreter==0.1.15
|
377 |
+
openai==1.3.8
|
378 |
+
openapi-schema-pydantic==1.2.4
|
379 |
+
opentelemetry-api==1.20.0
|
380 |
+
opentelemetry-exporter-otlp==1.20.0
|
381 |
+
opentelemetry-exporter-otlp-proto-common==1.20.0
|
382 |
+
opentelemetry-exporter-otlp-proto-grpc==1.20.0
|
383 |
+
opentelemetry-exporter-otlp-proto-http==1.20.0
|
384 |
+
opentelemetry-instrumentation==0.40b0
|
385 |
+
opentelemetry-proto==1.20.0
|
386 |
+
opentelemetry-sdk==1.20.0
|
387 |
+
opentelemetry-semantic-conventions==0.41b0
|
388 |
+
orjson==3.9.10
|
389 |
+
packaging==20.9
|
390 |
+
pandas==2.1.3
|
391 |
+
parso==0.8.3
|
392 |
+
pastel==0.2.1
|
393 |
+
pexpect==4.8.0
|
394 |
+
Pillow==10.1.0
|
395 |
+
pipdeptree==2.2.0
|
396 |
+
pkginfo==1.8.2
|
397 |
+
platformdirs==2.5.1
|
398 |
+
poetry==1.1.12
|
399 |
+
poetry-core==1.0.7
|
400 |
+
prisma==0.10.0
|
401 |
+
prompt-toolkit==3.0.41
|
402 |
+
protobuf==4.24.3
|
403 |
+
psutil==5.9.6
|
404 |
+
ptyprocess==0.7.0
|
405 |
+
pure-eval==0.2.2
|
406 |
+
pyarrow==14.0.1
|
407 |
+
pyasn1==0.5.0
|
408 |
+
pyasn1-modules==0.3.0
|
409 |
+
pycparser==2.21
|
410 |
+
pycryptodomex==3.11.0
|
411 |
+
pydantic==2.3.0
|
412 |
+
pydantic_core==2.6.3
|
413 |
+
pydeck==0.8.1b0
|
414 |
+
pydub==0.25.1
|
415 |
+
PyGithub==2.1.1
|
416 |
+
Pygments==2.16.1
|
417 |
+
PyGObject==3.42.1
|
418 |
+
pyinotify==0.9.6
|
419 |
+
PyJWT==2.8.0
|
420 |
+
pylev==1.2.0
|
421 |
+
PyNaCl==1.5.0
|
422 |
+
pyOpenSSL==23.2.0
|
423 |
+
pyparsing==2.4.7
|
424 |
+
PySocks==1.7.1
|
425 |
+
python-apt==2.4.0+ubuntu2
|
426 |
+
python-dateutil==2.8.2
|
427 |
+
python-dotenv==1.0.0
|
428 |
+
python-editor==1.0.4
|
429 |
+
python-engineio==4.7.0
|
430 |
+
python-graphql-client==0.4.3
|
431 |
+
python-multipart==0.0.6
|
432 |
+
python-socketio==5.9.0
|
433 |
+
pytz==2022.1
|
434 |
+
pytz-deprecation-shim==0.1.0.post0
|
435 |
+
pyxattr==0.7.2
|
436 |
+
PyYAML==6.0.1
|
437 |
+
pyzmq==25.1.1
|
438 |
+
readchar==4.0.5
|
439 |
+
referencing==0.30.2
|
440 |
+
regex==2023.10.3
|
441 |
+
requests==2.31.0
|
442 |
+
requests-oauthlib==1.3.1
|
443 |
+
requests-toolbelt==0.9.1
|
444 |
+
rich==13.6.0
|
445 |
+
rpds-py==0.12.0
|
446 |
+
rsa==4.9
|
447 |
+
screen-resolution-extra==0.0.0
|
448 |
+
SecretStorage==3.3.1
|
449 |
+
semantic-version==2.10.0
|
450 |
+
shellingham==1.4.0
|
451 |
+
six==1.16.0
|
452 |
+
smmap==5.0.1
|
453 |
+
sniffio==1.3.0
|
454 |
+
soupsieve==2.5
|
455 |
+
speedtest-cli==2.1.3
|
456 |
+
SQLAlchemy==2.0.20
|
457 |
+
stack-data==0.6.3
|
458 |
+
starlette==0.27.0
|
459 |
+
streamlit==1.28.1
|
460 |
+
syncer==2.0.3
|
461 |
+
systemd-python==234
|
462 |
+
tenacity==8.2.3
|
463 |
+
termcolor==2.3.0
|
464 |
+
tiktoken==0.4.0
|
465 |
+
tokenizers==0.15.0
|
466 |
+
tokentrim==0.1.13
|
467 |
+
toml==0.10.2
|
468 |
+
tomli==2.0.1
|
469 |
+
tomlkit==0.12.0
|
470 |
+
toolz==0.12.0
|
471 |
+
tornado==6.3.3
|
472 |
+
tqdm==4.66.1
|
473 |
+
traitlets==5.13.0
|
474 |
+
typer==0.9.0
|
475 |
+
typing-inspect==0.9.0
|
476 |
+
typing_extensions==4.8.0
|
477 |
+
tzdata==2023.3
|
478 |
+
tzlocal==4.3.1
|
479 |
+
ubuntu-advantage-tools==8001
|
480 |
+
ubuntu-drivers-common==0.0.0
|
481 |
+
ufw==0.36.1
|
482 |
+
unattended-upgrades==0.1
|
483 |
+
uptrace==1.20.0
|
484 |
+
uritemplate==4.1.1
|
485 |
+
urllib3==2.0.4
|
486 |
+
userpath==1.8.0
|
487 |
+
uvicorn==0.23.2
|
488 |
+
validators==0.22.0
|
489 |
+
virtualenv==20.13.0+ds
|
490 |
+
wadllib==1.3.6
|
491 |
+
watchdog==3.0.0
|
492 |
+
watchfiles==0.20.0
|
493 |
+
wcwidth==0.2.12
|
494 |
+
webencodings==0.5.1
|
495 |
+
websocket-client==1.6.4
|
496 |
+
websockets==11.0.3
|
497 |
+
wget==3.2
|
498 |
+
wrapt==1.15.0
|
499 |
+
xkit==0.0.0
|
500 |
+
yarl==1.9.2
|
501 |
+
yaspin==3.0.1
|
502 |
+
zipp==1.0.0
|