AmrGharieb commited on
Commit
db36f02
·
1 Parent(s): 7ceb632

first deployment

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gte_large/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv, find_dotenv
2
+ from langchain.chains import LLMChain
3
+ import streamlit as st
4
+ from decouple import config
5
+ from langchain.llms import OpenAI
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
8
+ from langchain.vectorstores import Chroma
9
+ from langchain.retrievers.document_compressors import LLMChainExtractor
10
+ from langchain.retrievers import ContextualCompressionRetriever
11
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
12
+ from langchain.chains import RetrievalQA
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain.evaluation.qa import QAGenerateChain
15
+ from langchain.chains import RetrievalQA
16
+ from langchain.chat_models import ChatOpenAI
17
+ from langchain.document_loaders import CSVLoader
18
+ from langchain.indexes import VectorstoreIndexCreator
19
+ from langchain.vectorstores import DocArrayInMemorySearch
20
+ from langchain.prompts import ChatPromptTemplate
21
+ from langchain.document_loaders.generic import GenericLoader
22
+ from langchain.document_loaders.parsers import OpenAIWhisperParser
23
+ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
24
+ from langchain.prompts import PromptTemplate
25
+ from langchain.memory import ConversationBufferMemory
26
+ from langchain.chains import ConversationalRetrievalChain
27
+ import time
28
+ from htmlTemplates import css, bot_template, user_template
29
+ from pathlib import Path
30
+ import pathlib
31
+ import platform
32
+ plt = platform.system()
33
+ if plt == 'Linux':
34
+ pathlib.WindowsPath = pathlib.PosixPath
35
+
36
+ _ = load_dotenv(find_dotenv()) # read local .env file
37
+
38
+
39
+ def timeit(func):
40
+ def wrapper(*args, **kwargs):
41
+ start_time = time.time() # Start time
42
+ result = func(*args, **kwargs) # Function execution
43
+ end_time = time.time() # End time
44
+ print(
45
+ f"Function {func.__name__} took {end_time - start_time} seconds to execute.")
46
+ return result
47
+ return wrapper
48
+
49
+
50
+ @timeit
51
+ def get_llm():
52
+ return OpenAI(temperature=0.0)
53
+
54
+
55
+ @timeit
56
+ def get_memory():
57
+ return ConversationBufferMemory(
58
+ memory_key="chat_history",
59
+ return_messages=True
60
+ )
61
+
62
+
63
+ @timeit
64
+ def generate_response(question, vectordb, llm, memory, chat_history):
65
+ template = """Use the provided context to answer the user's question.
66
+ you are honest petroleum engineer specialist in hydraulic fracture stimulation and reservoir engineering.
67
+ when you asked about code numer like SPE-19***-MS or any thing like that it's a paper search for it and give an introduction.
68
+ If you don't know the answer, respond with "Sorry Sir, I do not know".
69
+ Context: {context}
70
+ Question: {question}
71
+ Answer:
72
+ """
73
+
74
+ prompt = PromptTemplate(
75
+ template=template,
76
+ input_variables=[ 'question','context'])
77
+
78
+ qa_chain = ConversationalRetrievalChain.from_llm(
79
+ llm=llm,
80
+ retriever=vectordb.as_retriever(search_type="mmr", k=5, fetch_k=10),
81
+ memory=memory,
82
+ combine_docs_chain_kwargs={"prompt": prompt}
83
+ )
84
+
85
+ handle_userinput(
86
+ (qa_chain({"question": question, "chat_history": chat_history})))
87
+
88
+
89
+ @timeit
90
+ def create_embeding_function():
91
+ # embedding_func_all_mpnet_base_v2 = SentenceTransformerEmbeddings(
92
+ # model_name="all-mpnet-base-v2")
93
+ # # embedding_func_all_MiniLM_L6_v2 = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
94
+ # embedding_func_jina_embeddings_v2_base_en = SentenceTransformerEmbeddings(
95
+ # model_name="jinaai/jina-embeddings-v2-base-en"
96
+ # )
97
+ # embedding_func_jina_embeddings_v2_small_en = SentenceTransformerEmbeddings(
98
+ # model_name="jinaai/jina-embeddings-v2-small-en"
99
+ # )
100
+ embedding_func_jgte_large = SentenceTransformerEmbeddings(
101
+ model_name="thenlper/gte-large"
102
+ )
103
+ return embedding_func_jgte_large
104
+
105
+
106
+ @timeit
107
+ def get_vector_db(embedding_function):
108
+ vector_db = Chroma(persist_directory=str(Path('gte_large')),
109
+ embedding_function=embedding_function)
110
+ return vector_db
111
+
112
+
113
+ def handle_userinput(user_question):
114
+ response = user_question
115
+ if 'chat_history' not in st.session_state:
116
+ st.session_state.chat_history = []
117
+
118
+ st.session_state.chat_history = response['chat_history']
119
+
120
+ for i, message in enumerate(st.session_state.chat_history):
121
+ if i % 2 == 0:
122
+ st.write(user_template.replace(
123
+ "{{MSG}}", message.content), unsafe_allow_html=True)
124
+ else:
125
+ st.write(bot_template.replace(
126
+ "{{MSG}}", message.content), unsafe_allow_html=True)
127
+
128
+
129
+ if __name__ == "__main__":
130
+
131
+ st.set_page_config(
132
+ page_title="Hydraulic Fracture Stimulation Chat", page_icon=":books:")
133
+ st.write(css, unsafe_allow_html=True)
134
+ st.title("Hydraulic Fracture Stimulation Chat")
135
+ st.write(
136
+ "This is a chatbot that can answer questions related to petroleum engineering specially in hydraulic fracture stimulation.")
137
+
138
+ # get embeding function
139
+ embeding_function = create_embeding_function()
140
+ # get vector db
141
+ vector_db = get_vector_db(embeding_function)
142
+ # get llm
143
+ llm = get_llm()
144
+
145
+ # get memory
146
+ if 'memory' not in st.session_state:
147
+ st.session_state['memory'] = get_memory()
148
+ memory = st.session_state['memory']
149
+
150
+ # chat history
151
+ chat_history = []
152
+
153
+ prompt_question = st.chat_input("Please ask a question:")
154
+ if prompt_question:
155
+ generate_response(question=prompt_question, vectordb=vector_db,
156
+ llm=llm, memory=memory, chat_history=chat_history)
gte_large/67650f97-d36f-459f-af8f-ff25b716cd81/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3e93c7d71a7def052f3331a5611bcab93e76407c291c29158bd308fd8ac1a7
3
+ size 8472000
gte_large/67650f97-d36f-459f-af8f-ff25b716cd81/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:623782542530941eccea3efc3e80bfb767b479b3cf65a6d85dec916277b0b962
3
+ size 100
gte_large/67650f97-d36f-459f-af8f-ff25b716cd81/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da42581592e740de75828a7ee71890e66dbe3d16307ecb9e5d1a841bfeb52b9
3
+ size 113989
gte_large/67650f97-d36f-459f-af8f-ff25b716cd81/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2388906e8c52245056cf576860661b5752a625a42c8c422e9918af4a3029c86c
3
+ size 8000
gte_large/67650f97-d36f-459f-af8f-ff25b716cd81/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efd5680b5399e4190cf6fe79b8f4441981427a9c6edb0a50f206617c377875c
3
+ size 16976
gte_large/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c0d5ea2f5f5dfd09b5b0f30fb438331aba46bd53d56b74cc79e88481d4ac25
3
+ size 21815296
htmlTemplates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://th.bing.com/th/id/OIP.xXHQ5dk4qJH74WMGNezDjwHaHa?rs=1&pid=ImgDetMain">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''
requirements.txt ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.6
2
+ aiosignal==1.3.1
3
+ altair==5.1.2
4
+ annotated-types==0.6.0
5
+ anyio==3.7.1
6
+ asttokens==2.4.1
7
+ async-timeout==4.0.3
8
+ attrs==23.1.0
9
+ backoff==2.2.1
10
+ bcrypt==4.0.1
11
+ blinker==1.7.0
12
+ Brotli==1.1.0
13
+ cachetools==5.3.2
14
+ certifi==2023.7.22
15
+ cffi==1.16.0
16
+ charset-normalizer==3.3.2
17
+ Chroma==0.2.0
18
+ chroma-hnswlib==0.7.3
19
+ chromadb==0.4.17
20
+ click==8.1.7
21
+ colorama==0.4.6
22
+ coloredlogs==15.0.1
23
+ comm==0.2.0
24
+ cryptography==41.0.5
25
+ ctransformers==0.2.27
26
+ dataclasses-json==0.6.2
27
+ debugpy==1.8.0
28
+ decorator==5.1.1
29
+ Deprecated==1.2.14
30
+ distro==1.8.0
31
+ exceptiongroup==1.1.3
32
+ executing==2.0.1
33
+ fastapi==0.104.1
34
+ filelock==3.13.1
35
+ flatbuffers==23.5.26
36
+ frozenlist==1.4.0
37
+ fsspec==2023.10.0
38
+ gitdb==4.0.11
39
+ GitPython==3.1.40
40
+ google-auth==2.23.4
41
+ googleapis-common-protos==1.61.0
42
+ greenlet==3.0.1
43
+ grpcio==1.59.2
44
+ h11==0.14.0
45
+ httpcore==1.0.2
46
+ httptools==0.6.1
47
+ httpx==0.25.1
48
+ huggingface-hub==0.19.3
49
+ humanfriendly==10.0
50
+ idna==3.4
51
+ importlib-metadata==6.8.0
52
+ importlib-resources==6.1.1
53
+ ipykernel==6.26.0
54
+ ipython==8.17.2
55
+ jedi==0.19.1
56
+ Jinja2==3.1.2
57
+ joblib==1.3.2
58
+ jsonpatch==1.33
59
+ jsonpointer==2.4
60
+ jsonschema==4.19.2
61
+ jsonschema-specifications==2023.11.1
62
+ jupyter_client==8.6.0
63
+ jupyter_core==5.5.0
64
+ kubernetes==28.1.0
65
+ langchain==0.0.336
66
+ langsmith==0.0.64
67
+ markdown-it-py==3.0.0
68
+ MarkupSafe==2.1.3
69
+ marshmallow==3.20.1
70
+ matplotlib-inline==0.1.6
71
+ mdurl==0.1.2
72
+ monotonic==1.6
73
+ mpmath==1.3.0
74
+ multidict==6.0.4
75
+ mutagen==1.47.0
76
+ mypy-extensions==1.0.0
77
+ nest-asyncio==1.5.8
78
+ networkx==3.2.1
79
+ nltk==3.8.1
80
+ numpy==1.26.2
81
+ oauthlib==3.2.2
82
+ onnxruntime==1.16.2
83
+ openai==1.3.0
84
+ opentelemetry-api==1.21.0
85
+ opentelemetry-exporter-otlp-proto-common==1.21.0
86
+ opentelemetry-exporter-otlp-proto-grpc==1.21.0
87
+ opentelemetry-proto==1.21.0
88
+ opentelemetry-sdk==1.21.0
89
+ opentelemetry-semantic-conventions==0.42b0
90
+ overrides==7.4.0
91
+ packaging==23.2
92
+ pandas==2.1.3
93
+ parso==0.8.3
94
+ Pillow==10.1.0
95
+ platformdirs==4.0.0
96
+ posthog==3.0.2
97
+ prompt-toolkit==3.0.41
98
+ protobuf==4.25.1
99
+ psutil==5.9.6
100
+ pulsar-client==3.3.0
101
+ pure-eval==0.2.2
102
+ py-cpuinfo==9.0.0
103
+ pyarrow==14.0.1
104
+ pyasn1==0.5.0
105
+ pyasn1-modules==0.3.0
106
+ pycparser==2.21
107
+ pycryptodomex==3.19.0
108
+ pydantic==2.5.1
109
+ pydantic_core==2.14.3
110
+ pydeck==0.8.1b0
111
+ pydub==0.25.1
112
+ Pygments==2.16.1
113
+ pypdf==3.17.1
114
+ PyPika==0.48.9
115
+ pyreadline3==3.4.1
116
+ python-dateutil==2.8.2
117
+ python-decouple==3.8
118
+ python-dotenv==1.0.0
119
+ pytz==2023.3.post1
120
+ PyYAML==6.0.1
121
+ pyzmq==25.1.1
122
+ referencing==0.31.0
123
+ regex==2023.10.3
124
+ requests==2.31.0
125
+ requests-oauthlib==1.3.1
126
+ rich==13.7.0
127
+ rpds-py==0.12.0
128
+ rsa==4.9
129
+ safetensors==0.4.0
130
+ scikit-learn==1.3.2
131
+ scipy==1.11.3
132
+ sentence-transformers==2.2.2
133
+ sentencepiece==0.1.99
134
+ six==1.16.0
135
+ smmap==5.0.1
136
+ sniffio==1.3.0
137
+ SQLAlchemy==2.0.23
138
+ stack-data==0.6.3
139
+ starlette==0.27.0
140
+ streamlit==1.28.2
141
+ sympy==1.12
142
+ tenacity==8.2.3
143
+ threadpoolctl==3.2.0
144
+ tiktoken==0.5.1
145
+ tokenizers==0.15.0
146
+ toml==0.10.2
147
+ toolz==0.12.0
148
+ torch==2.1.1
149
+ torchvision==0.16.1
150
+ tornado==6.3.3
151
+ tqdm==4.66.1
152
+ traitlets==5.13.0
153
+ transformers==4.35.2
154
+ typer==0.9.0
155
+ typing-inspect==0.9.0
156
+ typing_extensions==4.8.0
157
+ tzdata==2023.3
158
+ tzlocal==5.2
159
+ urllib3==1.26.18
160
+ uvicorn==0.24.0.post1
161
+ validators==0.22.0
162
+ watchdog==3.0.0
163
+ watchfiles==0.21.0
164
+ wcwidth==0.2.10
165
+ websocket-client==1.6.4
166
+ websockets==12.0
167
+ wrapt==1.16.0
168
+ yarl==1.9.2
169
+ yt-dlp==2023.11.16
170
+ zipp==3.17.0