nnpy commited on
Commit
a018f2d
·
verified ·
1 Parent(s): 51c0917

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +96 -0
  3. getting_real_basecamp.pdf +3 -0
  4. requirements.txt +146 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ getting_real_basecamp.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import gradio as gr
4
+ import os
5
+ import google.generativeai as genai
6
+ from langchain.vectorstores import Chroma
7
+ from PyPDF2 import PdfReader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
10
+ from groq import Groq
11
+
12
+ genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
13
+
14
+ client = Groq(
15
+ api_key=os.environ.get("GROQ_API_KEY"),
16
+ )
17
+
18
+ file_path = './getting_real_basecamp.pdf'
19
+
20
+ def loader_data(file_path):
21
+ pdf_reader = PdfReader(file_path)
22
+ content = ''
23
+ for page in pdf_reader.pages:
24
+ content += page.extract_text()
25
+
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
27
+ texts = text_splitter.split_text(content)
28
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
29
+ vector_store = Chroma.from_texts(texts, embeddings).as_retriever()
30
+ return vector_store
31
+
32
+ db = loader_data(file_path)
33
+
34
+ def format_history(query, history):
35
+ msg = []
36
+ msg.append({'role': 'system', 'content': """You are docGPT, a chatbot designed to help users with their document-related queries. Initially you have contents of `getting_real_basecamp` book.\nSimply call the function "query_document" with the search_query parameter to get the relevent contents from the document.
37
+ - query_document: Get the answer to a question from a given document. It'll return the most relevant content from the document. Always use this function if the user is asking about the document content or related to that.
38
+ - parameters:
39
+ - search_query: string (required) - Use keywords to search the document.
40
+
41
+ If you need to use function or you want any information from the book, Use following format to respond. Make sure the argument in the function call tag can be parsed as a JSON object.
42
+ <query_document>{"search_query": "value"}</query_document>
43
+
44
+ If you don't want to use the function, just don't include any function call tags in the response. NEVER told user about the function call (That's a secret, only for you.).
45
+ """})
46
+ for i in history:
47
+ msg.append({"role": 'user', 'content': i[0]})
48
+ msg.append({"role": 'assistant', 'content': i[1]})
49
+ msg.append({"role": 'user', 'content': query})
50
+ return msg
51
+
52
+ def check_for_function_call(req):
53
+ if "<query_document>" in req and "</query_document>" in req:
54
+ reg = re.compile(r'<query_document>(.*?)</query_document>', re.DOTALL)
55
+ match = reg.search(req)
56
+ fn_call = match.group(1)
57
+ return fn_call
58
+ return None
59
+
60
+ def get_response(message, history):
61
+ msg = format_history(message, history)
62
+ chat_completion = client.chat.completions.create(
63
+ messages=msg,
64
+ model="mixtral-8x7b-32768",
65
+ stream=False
66
+ )
67
+ response = chat_completion.choices[0].message.content
68
+ print('#############')
69
+ print(response)
70
+ print('$$$$$$$$$$$$$$$$')
71
+ fn_call = check_for_function_call(response)
72
+ if fn_call is not None:
73
+ print("Function call found: ", fn_call)
74
+ fn_args = json.loads(fn_call)
75
+ res = db.get_relevant_documents(fn_args["search_query"])
76
+ print("query response: ", res)
77
+ msg.append(
78
+ {
79
+ "role": "user",
80
+ "content": "This is the function call response (NOT USER): " + str(res) + "Take this to user and answer the question based on it."
81
+ }
82
+ )
83
+ response = client.chat.completions.create(
84
+ messages=msg,
85
+ model="mixtral-8x7b-32768",
86
+ stream=False
87
+ ).choices[0].message.content
88
+ return response
89
+ else:
90
+ return response
91
+
92
+ demo = gr.ChatInterface(get_response, title='DocGPT', description="Chat with getting_real_basecamp document")
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch(auth=("test", "test"), show_api=False)
96
+
getting_real_basecamp.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a369da3ab9d824af8eddc9bfbaa6f8d9ae4a6cc3981f0bb92c2b19e46a563af
3
+ size 5118368
requirements.txt ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.3
3
+ aiosignal==1.3.1
4
+ altair==5.2.0
5
+ annotated-types==0.6.0
6
+ anyio==4.3.0
7
+ asgiref==3.7.2
8
+ async-timeout==4.0.3
9
+ attrs==23.2.0
10
+ backoff==2.2.1
11
+ bcrypt==4.1.2
12
+ build==1.1.1
13
+ cachetools==5.3.3
14
+ certifi==2024.2.2
15
+ charset-normalizer==3.3.2
16
+ chroma-hnswlib==0.7.3
17
+ chromadb==0.4.24
18
+ click==8.1.7
19
+ colorama==0.4.6
20
+ coloredlogs==15.0.1
21
+ contourpy==1.2.0
22
+ cycler==0.12.1
23
+ dataclasses-json==0.6.4
24
+ Deprecated==1.2.14
25
+ distro==1.9.0
26
+ exceptiongroup==1.2.0
27
+ fastapi==0.110.0
28
+ ffmpy==0.3.2
29
+ filelock==3.13.1
30
+ flatbuffers==23.5.26
31
+ fonttools==4.49.0
32
+ frozenlist==1.4.1
33
+ fsspec==2024.2.0
34
+ google-ai-generativelanguage==0.4.0
35
+ google-api-core==2.17.1
36
+ google-auth==2.28.1
37
+ google-generativeai==0.3.2
38
+ googleapis-common-protos==1.62.0
39
+ gradio==4.19.2
40
+ gradio_client==0.10.1
41
+ greenlet==3.0.3
42
+ groq==0.4.1
43
+ grpcio==1.62.0
44
+ grpcio-status==1.62.0
45
+ h11==0.14.0
46
+ httpcore==1.0.4
47
+ httptools==0.6.1
48
+ httpx==0.27.0
49
+ huggingface-hub==0.21.3
50
+ humanfriendly==10.0
51
+ idna==3.6
52
+ importlib-metadata==6.11.0
53
+ importlib_resources==6.1.2
54
+ Jinja2==3.1.3
55
+ jsonpatch==1.33
56
+ jsonpointer==2.4
57
+ jsonschema==4.21.1
58
+ jsonschema-specifications==2023.12.1
59
+ kiwisolver==1.4.5
60
+ kubernetes==29.0.0
61
+ langchain==0.1.9
62
+ langchain-community==0.0.24
63
+ langchain-core==0.1.28
64
+ langchain-google-genai==0.0.9
65
+ langsmith==0.1.10
66
+ markdown-it-py==3.0.0
67
+ MarkupSafe==2.1.5
68
+ marshmallow==3.21.0
69
+ matplotlib==3.8.3
70
+ mdurl==0.1.2
71
+ mmh3==4.1.0
72
+ monotonic==1.6
73
+ mpmath==1.3.0
74
+ multidict==6.0.5
75
+ mypy-extensions==1.0.0
76
+ numpy==1.26.4
77
+ oauthlib==3.2.2
78
+ onnxruntime==1.17.1
79
+ opentelemetry-api==1.23.0
80
+ opentelemetry-exporter-otlp-proto-common==1.23.0
81
+ opentelemetry-exporter-otlp-proto-grpc==1.23.0
82
+ opentelemetry-instrumentation==0.44b0
83
+ opentelemetry-instrumentation-asgi==0.44b0
84
+ opentelemetry-instrumentation-fastapi==0.44b0
85
+ opentelemetry-proto==1.23.0
86
+ opentelemetry-sdk==1.23.0
87
+ opentelemetry-semantic-conventions==0.44b0
88
+ opentelemetry-util-http==0.44b0
89
+ orjson==3.9.15
90
+ overrides==7.7.0
91
+ packaging==23.2
92
+ pandas==2.2.1
93
+ pillow==10.2.0
94
+ posthog==3.4.2
95
+ proto-plus==1.23.0
96
+ protobuf==4.25.3
97
+ pulsar-client==3.4.0
98
+ pyasn1==0.5.1
99
+ pyasn1-modules==0.3.0
100
+ pydantic==2.6.3
101
+ pydantic_core==2.16.3
102
+ pydub==0.25.1
103
+ Pygments==2.17.2
104
+ pyparsing==3.1.1
105
+ pypdf==4.0.2
106
+ PyPDF2==3.0.1
107
+ PyPika==0.48.9
108
+ pyproject_hooks==1.0.0
109
+ python-dateutil==2.9.0
110
+ python-dotenv==1.0.1
111
+ python-multipart==0.0.9
112
+ pytz==2024.1
113
+ PyYAML==6.0.1
114
+ referencing==0.33.0
115
+ requests==2.31.0
116
+ requests-oauthlib==1.3.1
117
+ rich==13.7.1
118
+ rpds-py==0.18.0
119
+ rsa==4.9
120
+ ruff==0.3.0
121
+ semantic-version==2.10.0
122
+ shellingham==1.5.4
123
+ six==1.16.0
124
+ sniffio==1.3.1
125
+ SQLAlchemy==2.0.27
126
+ starlette==0.36.3
127
+ sympy==1.12
128
+ tenacity==8.2.3
129
+ tokenizers==0.15.2
130
+ tomli==2.0.1
131
+ tomlkit==0.12.0
132
+ toolz==0.12.1
133
+ tqdm==4.66.2
134
+ typer==0.9.0
135
+ typing-inspect==0.9.0
136
+ typing_extensions==4.10.0
137
+ tzdata==2024.1
138
+ urllib3==2.2.1
139
+ uvicorn==0.27.1
140
+ uvloop==0.19.0
141
+ watchfiles==0.21.0
142
+ websocket-client==1.7.0
143
+ websockets==11.0.3
144
+ wrapt==1.16.0
145
+ yarl==1.9.4
146
+ zipp==3.17.0