humayoun96 commited on
Commit
df262b6
·
1 Parent(s): 969c255

Upload 9 files

Browse files
README.md CHANGED
@@ -1,13 +1,7 @@
1
- ---
2
- title: LawGPT
3
- emoji: 🔥
4
- colorFrom: purple
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 3.38.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ # Question Answer over a PDF document
2
+ This is a repository for my [YouTube channel](https://www.youtube.com/channel/UCOT01XvBSj12xQsANtTeAcQ), you can build a
 
 
 
 
 
 
 
 
 
3
 
4
+ #### YouTube video
5
+ I have created a video explaining this repository, you can watch it [here](https://youtu.be/fNfcEvhwTMA).
6
+
7
+ It have Gradio UI as well, [here](https://youtu.be/XRfHDoROopY) is a video explaining it.
config/__init__.py ADDED
File without changes
config/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (145 Bytes). View file
 
config/__pycache__/config.cpython-310.pyc ADDED
Binary file (499 Bytes). View file
 
config/config.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ from dotenv import load_dotenv, find_dotenv
5
+
6
+ load_dotenv(find_dotenv())
7
+
8
+ PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
9
+ PINECONE_ENVIRONMENT = os.getenv('PINECONE_ENVIRONMENT')
10
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
11
+
12
+ OUTPUT_DIR = os.path.join(
13
+ tempfile.gettempdir(),
14
+ 'rkk-document-gpt',
15
+ 'output'
16
+ )
17
+
18
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
conversation.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import ConversationalRetrievalChain
2
+ from langchain.chat_models import ChatOpenAI
3
+ from langchain.vectorstores import Pinecone
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.memory import ConversationBufferMemory
6
+ import pinecone
7
+
8
+
9
+ def create_conversation(query: str, chat_history: list, pinecone_api_key: str, pinecone_environment: str, pinecone_index_name: str, openai_api_key: str) -> tuple:
10
+ try:
11
+ pinecone.init(
12
+ api_key=pinecone_api_key,
13
+ environment=pinecone_environment,
14
+ )
15
+ embeddings = OpenAIEmbeddings(
16
+ openai_api_key=openai_api_key
17
+ )
18
+ db = Pinecone.from_existing_index(
19
+ index_name=pinecone_index_name,
20
+ embedding=embeddings
21
+ )
22
+ memory = ConversationBufferMemory(
23
+ memory_key='chat_history',
24
+ return_messages=False
25
+ )
26
+ cqa = ConversationalRetrievalChain.from_llm(
27
+ llm=ChatOpenAI(temperature=0.0,
28
+ openai_api_key=openai_api_key),
29
+ retriever=db.as_retriever(),
30
+ memory=memory,
31
+ get_chat_history=lambda h: h,
32
+ )
33
+ result = cqa({'question': query, 'chat_history': chat_history})
34
+ chat_history.append((query, result['answer']))
35
+ return '', chat_history
36
+ except Exception as e:
37
+ chat_history.append((query, e))
38
+ return '', chat_history
gradio-ui.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from indexes import create_indexes, clear_indexes
4
+ from conversation import create_conversation
5
+
6
+ with gr.Blocks() as demo:
7
+ with gr.Row():
8
+ pinecone_api_key = gr.components.Textbox(
9
+ label='Pinecone API key', type='password')
10
+ pinecone_environment = gr.components.Textbox(
11
+ label='Pinecone environment')
12
+ pinecone_index_name = gr.components.Textbox(
13
+ label='Pinecone index name')
14
+ openai_api_key = gr.components.Textbox(
15
+ label='Openai API key', type='password')
16
+ with gr.Row():
17
+ with gr.Column():
18
+ file = gr.components.File(
19
+ label='Upload your pdf file',
20
+ file_count='single',
21
+ file_types=['.pdf'])
22
+ with gr.Row():
23
+ upload = gr.components.Button(
24
+ value='Upload', variant='primary')
25
+ index_clear_btn = gr.components.Button(
26
+ value='Clear', variant='stop')
27
+ label = gr.components.Textbox(label='Status of uploaded file')
28
+
29
+ chatbot = gr.Chatbot(label='Talk to the Document')
30
+ msg = gr.Textbox(label='Ask a question')
31
+ clear = gr.ClearButton([msg, chatbot])
32
+
33
+ upload.click(create_indexes, [
34
+ file, pinecone_api_key, pinecone_environment, pinecone_index_name, openai_api_key], [label])
35
+ index_clear_btn.click(clear_indexes, [
36
+ pinecone_api_key, pinecone_environment, pinecone_index_name], [label, file])
37
+ msg.submit(create_conversation, [msg, chatbot, pinecone_api_key,
38
+ pinecone_environment, pinecone_index_name, openai_api_key], [msg, chatbot])
39
+
40
+ if __name__ == '__main__':
41
+ demo.launch()
indexes.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import os
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.document_loaders import DirectoryLoader, TextLoader
7
+ from langchain.vectorstores import Pinecone
8
+ from PyPDF2 import PdfReader
9
+ import pinecone
10
+
11
+ from config import config
12
+
13
+ def create_indexes(file: tempfile, pinecone_api_key: str, pinecone_environment: str, pinecone_index_name: str, openai_api_key: str) -> str:
14
+ try:
15
+ file_path = file.name
16
+ reader = PyPDFLoader(file_path)
17
+ documents = reader.load_and_split()
18
+ embeddings = OpenAIEmbeddings(
19
+ openai_api_key=openai_api_key
20
+ )
21
+ pinecone.init(
22
+ api_key=pinecone_api_key,
23
+ environment=pinecone_environment
24
+ )
25
+ indexes_list = pinecone.list_indexes()
26
+ if pinecone_index_name not in indexes_list:
27
+ pinecone.create_index(
28
+ name=pinecone_index_name,
29
+ dimension=1536
30
+ )
31
+ Pinecone.from_documents(
32
+ documents=documents,
33
+ embedding=embeddings,
34
+ index_name=pinecone_index_name
35
+ )
36
+ return 'Document uploaded and index created successfully. You can chat now.'
37
+ except Exception as e:
38
+ return e
39
+
40
+ def clear_indexes(pinecone_api_key: str, pinecone_environment: str, pinecone_index_name: str) -> str:
41
+ try:
42
+ pinecone.init(
43
+ api_key=pinecone_api_key,
44
+ environment=pinecone_environment
45
+ )
46
+ indexes_list = pinecone.list_indexes()
47
+ if pinecone_index_name in indexes_list:
48
+ pinecone.delete_index(name=pinecone_index_name)
49
+ return 'Indexes cleared.', None
50
+ except Exception as e:
51
+ return e, None
52
+
requirements.txt ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ anyio==3.7.1
6
+ asttokens==2.2.1
7
+ async-timeout==4.0.2
8
+ attrs==23.1.0
9
+ autopep8==2.0.2
10
+ backcall==0.2.0
11
+ backoff==2.2.1
12
+ certifi==2023.5.7
13
+ charset-normalizer==3.2.0
14
+ click==8.1.4
15
+ clickhouse-connect==0.6.6
16
+ coloredlogs==15.0.1
17
+ comm==0.1.3
18
+ contourpy==1.1.0
19
+ cycler==0.11.0
20
+ dataclasses-json==0.5.9
21
+ debugpy==1.6.7
22
+ decorator==5.1.1
23
+ dnspython==2.3.0
24
+ duckdb==0.8.1
25
+ exceptiongroup==1.1.2
26
+ executing==1.2.0
27
+ fastapi==0.100.0
28
+ ffmpy==0.3.0
29
+ filelock==3.12.2
30
+ flatbuffers==23.5.26
31
+ fonttools==4.40.0
32
+ frozenlist==1.3.3
33
+ fsspec==2023.6.0
34
+ gradio==3.36.1
35
+ gradio_client==0.2.7
36
+ greenlet==2.0.2
37
+ h11==0.14.0
38
+ hnswlib==0.7.0
39
+ httpcore==0.17.3
40
+ httptools==0.6.0
41
+ httpx==0.24.1
42
+ huggingface-hub==0.16.4
43
+ humanfriendly==10.0
44
+ idna==3.4
45
+ importlib-metadata==6.8.0
46
+ importlib-resources==6.0.0
47
+ ipykernel==6.24.0
48
+ ipython==8.14.0
49
+ ipywidgets==8.0.7
50
+ jedi==0.18.2
51
+ Jinja2==3.1.2
52
+ jsonschema==4.18.0
53
+ jsonschema-specifications==2023.6.1
54
+ jupyter_client==8.3.0
55
+ jupyter_core==5.3.1
56
+ jupyterlab-widgets==3.0.8
57
+ kiwisolver==1.4.4
58
+ langchain==0.0.228
59
+ langchainplus-sdk==0.0.20
60
+ linkify-it-py==2.0.2
61
+ loguru==0.7.0
62
+ lz4==4.3.2
63
+ markdown-it-py==2.2.0
64
+ MarkupSafe==2.1.3
65
+ marshmallow==3.19.0
66
+ marshmallow-enum==1.5.1
67
+ matplotlib==3.7.2
68
+ matplotlib-inline==0.1.6
69
+ mdit-py-plugins==0.3.3
70
+ mdurl==0.1.2
71
+ monotonic==1.6
72
+ mpmath==1.3.0
73
+ multidict==6.0.4
74
+ mypy-extensions==1.0.0
75
+ nest-asyncio==1.5.6
76
+ numexpr==2.8.4
77
+ numpy==1.25.1
78
+ onnxruntime==1.15.1
79
+ openai==0.27.8
80
+ openapi-schema-pydantic==1.2.4
81
+ orjson==3.9.2
82
+ overrides==7.3.1
83
+ packaging==23.1
84
+ pandas==2.0.3
85
+ parso==0.8.3
86
+ pexpect==4.8.0
87
+ pickleshare==0.7.5
88
+ Pillow==10.0.0
89
+ pinecone-client==2.2.2
90
+ platformdirs==3.8.1
91
+ posthog==3.0.1
92
+ prompt-toolkit==3.0.39
93
+ protobuf==4.23.4
94
+ psutil==5.9.5
95
+ ptyprocess==0.7.0
96
+ pulsar-client==3.2.0
97
+ pure-eval==0.2.2
98
+ pycodestyle==2.10.0
99
+ pydantic==1.10.11
100
+ pydub==0.25.1
101
+ Pygments==2.15.1
102
+ pyparsing==3.0.9
103
+ PyPDF2==3.0.1
104
+ python-dateutil==2.8.2
105
+ python-dotenv==1.0.0
106
+ python-multipart==0.0.6
107
+ pytz==2023.3
108
+ PyYAML==6.0
109
+ pyzmq==25.1.0
110
+ referencing==0.29.1
111
+ regex==2023.6.3
112
+ requests==2.31.0
113
+ rpds-py==0.8.10
114
+ semantic-version==2.10.0
115
+ six==1.16.0
116
+ sniffio==1.3.0
117
+ SQLAlchemy==2.0.18
118
+ stack-data==0.6.2
119
+ starlette==0.27.0
120
+ sympy==1.12
121
+ tenacity==8.2.2
122
+ tiktoken==0.4.0
123
+ tokenizers==0.13.3
124
+ tomli==2.0.1
125
+ toolz==0.12.0
126
+ tornado==6.3.2
127
+ tqdm==4.65.0
128
+ traitlets==5.9.0
129
+ typing-inspect==0.9.0
130
+ typing_extensions==4.7.1
131
+ tzdata==2023.3
132
+ uc-micro-py==1.0.2
133
+ urllib3==2.0.3
134
+ uvicorn==0.22.0
135
+ watchfiles==0.19.0
136
+ wcwidth==0.2.6
137
+ websockets==11.0.3
138
+ widgetsnbextension==4.0.8
139
+ yarl==1.9.2
140
+ zipp==3.16.0
141
+ zstandard==0.21.0
142
+ pypdf==3.12.2