Spaces:

nnpy
/

DocGPT

Sleeping

App Files Files Community

nnpy commited on Mar 1, 2024

Commit

a018f2d

verified ·

1 Parent(s): 51c0917

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
app.py +96 -0
getting_real_basecamp.pdf +3 -0
requirements.txt +146 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+getting_real_basecamp.pdf filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import json
+import re
+import gradio as gr
+import os
+import google.generativeai as genai
+from langchain.vectorstores import Chroma
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from groq import Groq
+genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
+client = Groq(
+    api_key=os.environ.get("GROQ_API_KEY"),
+)
+file_path = './getting_real_basecamp.pdf'
+def loader_data(file_path):
+    pdf_reader = PdfReader(file_path)
+    content = ''
+    for page in pdf_reader.pages:
+        content += page.extract_text()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=0)
+    texts = text_splitter.split_text(content)
+    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
+    vector_store = Chroma.from_texts(texts, embeddings).as_retriever()
+    return vector_store
+db = loader_data(file_path)
+def format_history(query, history):
+    msg = []
+    msg.append({'role': 'system', 'content': """You are docGPT, a chatbot designed to help users with their document-related queries. Initially you have contents of `getting_real_basecamp` book.\nSimply call the function "query_document" with the search_query parameter to get the relevent contents from the document.
+- query_document: Get the answer to a question from a given document. It'll return the most relevant content from the document. Always use this function if the user is asking about the document content or related to that.
+    - parameters:
+        - search_query: string (required) - Use keywords to search the document.
+If you need to use function or you want any information from the book, Use following format to respond. Make sure the argument in the function call tag can be parsed as a JSON object.
+<query_document>{"search_query": "value"}</query_document>
+If you don't want to use the function, just don't include any function call tags in the response. NEVER told user about the function call (That's a secret, only for you.).
+"""})
+    for i in history:
+        msg.append({"role": 'user', 'content': i[0]})
+        msg.append({"role": 'assistant', 'content': i[1]})
+    msg.append({"role": 'user', 'content': query})
+    return msg
+def check_for_function_call(req):
+    if "<query_document>" in req and "</query_document>" in req:
+        reg = re.compile(r'<query_document>(.*?)</query_document>', re.DOTALL)
+        match = reg.search(req)
+        fn_call = match.group(1)
+        return fn_call
+    return None
+def get_response(message, history):
+    msg = format_history(message, history)
+    chat_completion = client.chat.completions.create(
+    messages=msg,
+        model="mixtral-8x7b-32768",
+        stream=False
+    )
+    response = chat_completion.choices[0].message.content
+    print('#############')
+    print(response)
+    print('$$$$$$$$$$$$$$$$')
+    fn_call = check_for_function_call(response)
+    if fn_call is not None:
+        print("Function call found: ", fn_call)
+        fn_args = json.loads(fn_call)
+        res = db.get_relevant_documents(fn_args["search_query"])
+        print("query response: ", res)
+        msg.append(
+            {
+                "role": "user",
+                "content": "This is the function call response (NOT USER): " + str(res) + "Take this to user and answer the question based on it."
+            }
+        )
+        response = client.chat.completions.create(
+            messages=msg,
+            model="mixtral-8x7b-32768",
+            stream=False
+        ).choices[0].message.content
+        return response
+    else:
+        return response
+demo = gr.ChatInterface(get_response, title='DocGPT', description="Chat with getting_real_basecamp document")
+if __name__ == "__main__":
+    demo.launch(auth=("test", "test"), show_api=False)

getting_real_basecamp.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a369da3ab9d824af8eddc9bfbaa6f8d9ae4a6cc3981f0bb92c2b19e46a563af
+size 5118368

requirements.txt ADDED Viewed

	@@ -0,0 +1,146 @@

+aiofiles==23.2.1
+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.3.0
+asgiref==3.7.2
+async-timeout==4.0.3
+attrs==23.2.0
+backoff==2.2.1
+bcrypt==4.1.2
+build==1.1.1
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+chroma-hnswlib==0.7.3
+chromadb==0.4.24
+click==8.1.7
+colorama==0.4.6
+coloredlogs==15.0.1
+contourpy==1.2.0
+cycler==0.12.1
+dataclasses-json==0.6.4
+Deprecated==1.2.14
+distro==1.9.0
+exceptiongroup==1.2.0
+fastapi==0.110.0
+ffmpy==0.3.2
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.49.0
+frozenlist==1.4.1
+fsspec==2024.2.0
+google-ai-generativelanguage==0.4.0
+google-api-core==2.17.1
+google-auth==2.28.1
+google-generativeai==0.3.2
+googleapis-common-protos==1.62.0
+gradio==4.19.2
+gradio_client==0.10.1
+greenlet==3.0.3
+groq==0.4.1
+grpcio==1.62.0
+grpcio-status==1.62.0
+h11==0.14.0
+httpcore==1.0.4
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.21.3
+humanfriendly==10.0
+idna==3.6
+importlib-metadata==6.11.0
+importlib_resources==6.1.2
+Jinja2==3.1.3
+jsonpatch==1.33
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+kubernetes==29.0.0
+langchain==0.1.9
+langchain-community==0.0.24
+langchain-core==0.1.28
+langchain-google-genai==0.0.9
+langsmith==0.1.10
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.0
+matplotlib==3.8.3
+mdurl==0.1.2
+mmh3==4.1.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.17.1
+opentelemetry-api==1.23.0
+opentelemetry-exporter-otlp-proto-common==1.23.0
+opentelemetry-exporter-otlp-proto-grpc==1.23.0
+opentelemetry-instrumentation==0.44b0
+opentelemetry-instrumentation-asgi==0.44b0
+opentelemetry-instrumentation-fastapi==0.44b0
+opentelemetry-proto==1.23.0
+opentelemetry-sdk==1.23.0
+opentelemetry-semantic-conventions==0.44b0
+opentelemetry-util-http==0.44b0
+orjson==3.9.15
+overrides==7.7.0
+packaging==23.2
+pandas==2.2.1
+pillow==10.2.0
+posthog==3.4.2
+proto-plus==1.23.0
+protobuf==4.25.3
+pulsar-client==3.4.0
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pydantic==2.6.3
+pydantic_core==2.16.3
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.1
+pypdf==4.0.2
+PyPDF2==3.0.1
+PyPika==0.48.9
+pyproject_hooks==1.0.0
+python-dateutil==2.9.0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.33.0
+requests==2.31.0
+requests-oauthlib==1.3.1
+rich==13.7.1
+rpds-py==0.18.0
+rsa==4.9
+ruff==0.3.0
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+SQLAlchemy==2.0.27
+starlette==0.36.3
+sympy==1.12
+tenacity==8.2.3
+tokenizers==0.15.2
+tomli==2.0.1
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.2
+typer==0.9.0
+typing-inspect==0.9.0
+typing_extensions==4.10.0
+tzdata==2024.1
+urllib3==2.2.1
+uvicorn==0.27.1
+uvloop==0.19.0
+watchfiles==0.21.0
+websocket-client==1.7.0
+websockets==11.0.3
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.17.0