isayahc commited on
Commit
65d65e8
1 Parent(s): 8f173c8

fixed bugs involving MVP and updated requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +13 -6
  2. requirements.txt +255 -0
app.py CHANGED
@@ -11,21 +11,26 @@ from langchain.llms import OpenAI
11
  text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
12
 
13
  from langchain.llms import HuggingFaceHub
14
- flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
15
- # flan_ul2 = OpenAI()
16
 
17
  global qa
18
 
19
- from langchain.embeddings import HuggingFaceHubEmbeddings
20
- embeddings = HuggingFaceHubEmbeddings()
 
21
 
22
  from langchain.vectorstores import Chroma
23
 
24
  from langchain.chains import RetrievalQA
 
 
 
25
  def loading_pdf():
26
  return "Loading..."
27
  def pdf_changes(pdf_doc):
28
- loader = OnlinePDFLoader(pdf_doc.name)
 
29
  documents = loader.load()
30
  texts = text_splitter.split_documents(documents)
31
  db = Chroma.from_documents(texts, embeddings)
@@ -85,9 +90,11 @@ title = """
85
  with gr.Blocks(css=css) as demo:
86
  with gr.Column(elem_id="col-container"):
87
  gr.HTML(title)
 
88
 
89
  with gr.Column():
90
- pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
 
91
  with gr.Row():
92
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
93
  load_pdf = gr.Button("Load pdf to langchain")
 
11
  text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
12
 
13
  from langchain.llms import HuggingFaceHub
14
+ # flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
15
+ flan_ul2 = OpenAI()
16
 
17
  global qa
18
 
19
+ from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
20
+ # embeddings = HuggingFaceHubEmbeddings()
21
+ embeddings = OpenAIEmbeddings()
22
 
23
  from langchain.vectorstores import Chroma
24
 
25
  from langchain.chains import RetrievalQA
26
+
27
+ from langchain.document_loaders import PyPDFLoader
28
+
29
  def loading_pdf():
30
  return "Loading..."
31
  def pdf_changes(pdf_doc):
32
+ # loader = OnlinePDFLoader(pdf_doc.name)
33
+ loader = PyPDFLoader(pdf_doc.name)
34
  documents = loader.load()
35
  texts = text_splitter.split_documents(documents)
36
  db = Chroma.from_documents(texts, embeddings)
 
90
  with gr.Blocks(css=css) as demo:
91
  with gr.Column(elem_id="col-container"):
92
  gr.HTML(title)
93
+ # with gr.Blocks() as demo:
94
 
95
  with gr.Column():
96
+ pdf_doc = gr.File()
97
+ # pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
98
  with gr.Row():
99
  langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
100
  load_pdf = gr.Button("Load pdf to langchain")
requirements.txt CHANGED
@@ -245,3 +245,258 @@ versioneer==0.29
245
  websockets==11.0.3
246
  wrapt==1.16.0
247
  yarl==1.9.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  websockets==11.0.3
246
  wrapt==1.16.0
247
  yarl==1.9.4
248
+ aiofiles==23.2.1
249
+ aiohttp==3.9.1
250
+ aiosignal==1.3.1
251
+ aiostream==0.5.2
252
+ altair==5.1.2
253
+ annotated-types==0.5.0
254
+ anyio==3.7.1
255
+ appdirs==1.4.4
256
+ argcomplete==1.8.1
257
+ astor==0.8.1
258
+ asttokens==2.4.1
259
+ async-timeout==4.0.3
260
+ asyncer==0.0.2
261
+ attrs==23.1.0
262
+ auth0-python==4.4.2
263
+ Babel==2.8.0
264
+ backoff==2.2.1
265
+ beautiful-date==2.2.1
266
+ beautifulsoup4==4.12.2
267
+ bidict==0.22.1
268
+ blessed==1.20.0
269
+ blinker==1.4
270
+ Brotli==1.0.9
271
+ CacheControl==0.12.10
272
+ cachetools==5.3.1
273
+ cachy==0.3.0
274
+ certifi==2023.7.22
275
+ cffi==1.15.1
276
+ chardet==4.0.0
277
+ charset-normalizer==3.2.0
278
+ cleo==0.8.1
279
+ click==8.1.7
280
+ clikit==0.6.2
281
+ colorama==0.4.4
282
+ comm==0.2.0
283
+ command-not-found==0.3
284
+ contourpy==1.2.0
285
+ crashtest==0.3.1
286
+ cryptography==41.0.3
287
+ cycler==0.12.1
288
+ dataclasses-json==0.5.14
289
+ dbus-python==1.2.18
290
+ debugpy==1.8.0
291
+ decorator==5.1.1
292
+ Deprecated==1.2.14
293
+ distlib==0.3.4
294
+ distro==1.7.0
295
+ distro-info==1.1+ubuntu0.1
296
+ exceptiongroup==1.1.3
297
+ executing==2.0.1
298
+ fastapi==0.104.1
299
+ fastapi-socketio==0.0.10
300
+ ffmpy==0.3.1
301
+ filelock==3.6.0
302
+ filetype==1.2.0
303
+ fonttools==4.44.3
304
+ frozenlist==1.4.0
305
+ fsspec==2023.10.0
306
+ gcsa==2.1.0
307
+ gdown==4.7.1
308
+ git-python==1.0.3
309
+ gitdb==4.0.11
310
+ GitPython==3.1.40
311
+ google-api-core==2.11.1
312
+ google-api-python-client==2.99.0
313
+ google-auth==2.23.0
314
+ google-auth-httplib2==0.1.1
315
+ google-auth-oauthlib==0.8.0
316
+ googleapis-common-protos==1.60.0
317
+ gradio==4.4.1
318
+ gradio_client==0.7.0
319
+ graphviz==0.14.2
320
+ greenlet==2.0.2
321
+ grpcio==1.58.0
322
+ gyp==0.1
323
+ h11==0.14.0
324
+ html2image==2.0.4.3
325
+ html5lib==1.1
326
+ httpcore==0.18.0
327
+ httplib2==0.20.2
328
+ httpx==0.25.0
329
+ huggingface-hub==0.19.4
330
+ idna==3.4
331
+ importlib-metadata==6.8.0
332
+ importlib-resources==6.1.1
333
+ inquirer==3.1.4
334
+ ipykernel==6.26.0
335
+ ipython==8.18.0
336
+ jedi==0.19.1
337
+ jeepney==0.7.1
338
+ Jinja2==3.1.2
339
+ joblib==1.3.2
340
+ jsonschema==4.19.2
341
+ jsonschema-specifications==2023.7.1
342
+ jupyter_client==8.6.0
343
+ jupyter_core==5.5.0
344
+ keyring==21.8.0
345
+ kiwisolver==1.4.5
346
+ langchain==0.0.281
347
+ langsmith==0.0.33
348
+ launchpadlib==1.10.16
349
+ Lazify==0.4.0
350
+ lazr.restfulclient==0.14.4
351
+ lazr.uri==1.0.6
352
+ litellm==0.13.2
353
+ livereload==2.6.3
354
+ llama-index==0.9.13
355
+ lockfile==0.12.2
356
+ Markdown==3.3.6
357
+ markdown-it-py==3.0.0
358
+ MarkupSafe==2.0.1
359
+ marshmallow==3.20.1
360
+ matplotlib==3.8.2
361
+ matplotlib-inline==0.1.6
362
+ mdurl==0.1.2
363
+ mkdocs==1.1.2
364
+ more-itertools==8.10.0
365
+ msgpack==1.0.3
366
+ multidict==6.0.4
367
+ mutagen==1.45.1
368
+ mypy-extensions==1.0.0
369
+ nest-asyncio==1.5.8
370
+ netifaces==0.11.0
371
+ nltk==3.8.1
372
+ nodeenv==1.8.0
373
+ numexpr==2.8.5
374
+ numpy==1.25.2
375
+ oauthlib==3.2.0
376
+ open-interpreter==0.1.15
377
+ openai==1.3.8
378
+ openapi-schema-pydantic==1.2.4
379
+ opentelemetry-api==1.20.0
380
+ opentelemetry-exporter-otlp==1.20.0
381
+ opentelemetry-exporter-otlp-proto-common==1.20.0
382
+ opentelemetry-exporter-otlp-proto-grpc==1.20.0
383
+ opentelemetry-exporter-otlp-proto-http==1.20.0
384
+ opentelemetry-instrumentation==0.40b0
385
+ opentelemetry-proto==1.20.0
386
+ opentelemetry-sdk==1.20.0
387
+ opentelemetry-semantic-conventions==0.41b0
388
+ orjson==3.9.10
389
+ packaging==20.9
390
+ pandas==2.1.3
391
+ parso==0.8.3
392
+ pastel==0.2.1
393
+ pexpect==4.8.0
394
+ Pillow==10.1.0
395
+ pipdeptree==2.2.0
396
+ pkginfo==1.8.2
397
+ platformdirs==2.5.1
398
+ poetry==1.1.12
399
+ poetry-core==1.0.7
400
+ prisma==0.10.0
401
+ prompt-toolkit==3.0.41
402
+ protobuf==4.24.3
403
+ psutil==5.9.6
404
+ ptyprocess==0.7.0
405
+ pure-eval==0.2.2
406
+ pyarrow==14.0.1
407
+ pyasn1==0.5.0
408
+ pyasn1-modules==0.3.0
409
+ pycparser==2.21
410
+ pycryptodomex==3.11.0
411
+ pydantic==2.3.0
412
+ pydantic_core==2.6.3
413
+ pydeck==0.8.1b0
414
+ pydub==0.25.1
415
+ PyGithub==2.1.1
416
+ Pygments==2.16.1
417
+ PyGObject==3.42.1
418
+ pyinotify==0.9.6
419
+ PyJWT==2.8.0
420
+ pylev==1.2.0
421
+ PyNaCl==1.5.0
422
+ pyOpenSSL==23.2.0
423
+ pyparsing==2.4.7
424
+ PySocks==1.7.1
425
+ python-apt==2.4.0+ubuntu2
426
+ python-dateutil==2.8.2
427
+ python-dotenv==1.0.0
428
+ python-editor==1.0.4
429
+ python-engineio==4.7.0
430
+ python-graphql-client==0.4.3
431
+ python-multipart==0.0.6
432
+ python-socketio==5.9.0
433
+ pytz==2022.1
434
+ pytz-deprecation-shim==0.1.0.post0
435
+ pyxattr==0.7.2
436
+ PyYAML==6.0.1
437
+ pyzmq==25.1.1
438
+ readchar==4.0.5
439
+ referencing==0.30.2
440
+ regex==2023.10.3
441
+ requests==2.31.0
442
+ requests-oauthlib==1.3.1
443
+ requests-toolbelt==0.9.1
444
+ rich==13.6.0
445
+ rpds-py==0.12.0
446
+ rsa==4.9
447
+ screen-resolution-extra==0.0.0
448
+ SecretStorage==3.3.1
449
+ semantic-version==2.10.0
450
+ shellingham==1.4.0
451
+ six==1.16.0
452
+ smmap==5.0.1
453
+ sniffio==1.3.0
454
+ soupsieve==2.5
455
+ speedtest-cli==2.1.3
456
+ SQLAlchemy==2.0.20
457
+ stack-data==0.6.3
458
+ starlette==0.27.0
459
+ streamlit==1.28.1
460
+ syncer==2.0.3
461
+ systemd-python==234
462
+ tenacity==8.2.3
463
+ termcolor==2.3.0
464
+ tiktoken==0.4.0
465
+ tokenizers==0.15.0
466
+ tokentrim==0.1.13
467
+ toml==0.10.2
468
+ tomli==2.0.1
469
+ tomlkit==0.12.0
470
+ toolz==0.12.0
471
+ tornado==6.3.3
472
+ tqdm==4.66.1
473
+ traitlets==5.13.0
474
+ typer==0.9.0
475
+ typing-inspect==0.9.0
476
+ typing_extensions==4.8.0
477
+ tzdata==2023.3
478
+ tzlocal==4.3.1
479
+ ubuntu-advantage-tools==8001
480
+ ubuntu-drivers-common==0.0.0
481
+ ufw==0.36.1
482
+ unattended-upgrades==0.1
483
+ uptrace==1.20.0
484
+ uritemplate==4.1.1
485
+ urllib3==2.0.4
486
+ userpath==1.8.0
487
+ uvicorn==0.23.2
488
+ validators==0.22.0
489
+ virtualenv==20.13.0+ds
490
+ wadllib==1.3.6
491
+ watchdog==3.0.0
492
+ watchfiles==0.20.0
493
+ wcwidth==0.2.12
494
+ webencodings==0.5.1
495
+ websocket-client==1.6.4
496
+ websockets==11.0.3
497
+ wget==3.2
498
+ wrapt==1.15.0
499
+ xkit==0.0.0
500
+ yarl==1.9.2
501
+ yaspin==3.0.1
502
+ zipp==1.0.0