File size: 2,774 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# ensure constrained to requirements.txt version:
torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64"
torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64"

# optional for chat with PDF
langchain==0.2.6
langchain_experimental==0.0.62
langchain-community==0.2.6
langsmith==0.1.82
langchain-core==0.2.23
langchain-text-splitters==0.2.2
#langchain_huggingface==0.0.3

pypdf>=3.17.1
# avoid textract, requires old six
#textract==1.6.5
pypdfium2>=4.24.0

# for HF embeddings
sentence_transformers>=3.0.1
# https://github.com/h2oai/instructor-embedding/tree/h2ogpt
# pip wheel .
InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl
# https://github.com/h2oai/sentence-transformers/tree/h2ogpt
# pip wheel .
sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl

# optional: for OpenAI endpoint or embeddings (requires key)
replicate>=0.26.0
anthropic>=0.34.2
langchain-anthropic>=0.1.20
together>=1.1.5
langchain_together==0.1.3
langchain-openai>=0.1.8
langchain-google-genai>=1.0.8
google-generativeai>=0.7.2
google-ai-generativelanguage>=0.6.6
# pydantic version conflict
#mistral_common==1.3.3

llava @ https://h2o-release.s3.amazonaws.com/h2ogpt/llava-1.7.0.dev0-py3-none-any.whl

#langchain_mistralai==0.1.2  # tokenizers<0.16.0, but transformers requires >=0.19
httpx>=0.25.2
httpx-sse>=0.3.1
mistralai>=0.4.0
# pydantic issue, don't need yet
#mistral-common==1.0.2

groq>=0.5.0
langchain-groq>=0.1.5

# local vector db
chromadb==0.4.23

pydantic-settings>=2.1.0

# server vector db
#pymilvus==2.2.8

# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
# unstructured==0.8.1

# strong support for images
# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice
unstructured[local-inference]==0.12.5
unstructured[all-docs]==0.12.5
docx2txt==0.8
python-docx==1.1.0
#pdf2image==1.16.3
#pytesseract==0.3.10
pillow>=10.2.0
posthog

pdfminer.six==20231228
urllib3
requests_file

#pdf2image==1.16.3
#pytesseract==0.3.10
tabulate>=0.9.0
# FYI pandoc already part of requirements.txt

# JSONLoader, but makes some trouble for some users
# TRY: apt-get install autoconf libtool
# unclear what happens on windows/mac for now
jq>=1.4.1; platform_machine == "x86_64"

# to check licenses
# Run: pip-licenses|grep -v 'BSD\|Apache\|MIT'
pip-licenses>=4.3.0

# weaviate vector db
# required for httpx for mistralai
weaviate-client==3.26.2

# vllm==0.2.2

# only gradio>=4
gradio_pdf>=0.0.7

gradio_tools>=0.0.9

# Qdrant - https://qdrant.tech vector database
qdrant-client>=1.8.0

# MIT:
arxiv>=2.1.3