Koshti10 commited on
Commit
2927735
1 Parent(s): cdc9be2

Upload 9 files

Browse files
Files changed (6) hide show
  1. app.py +65 -12
  2. app_drive.py +176 -0
  3. driveapi/drive.py +71 -0
  4. driveapi/drive_database.py +31 -0
  5. driveapi/service.py +30 -0
  6. lc_base/chain.py +9 -38
app.py CHANGED
@@ -2,15 +2,25 @@
2
 
3
  import gradio as gr
4
  import time
5
- from lc_base.chain import openai_chain
6
  import os
7
- from lc_base.logs import save_log
 
 
 
 
 
 
 
 
 
 
8
 
9
  dir = os.path.join("outputs", "combined", "policy_eu_asia_usa", "faiss_index")
10
  # dir = os.path.join("outputs", "policy", "1", "faiss_index")
11
 
12
- title = """<h1 align="center">Chat</h1>"""
13
- description = """<br><br><h3 align="center">This is a literature chat model, which can currently answer questions to AI Policies provided.</h3>"""
14
 
15
  def save_api_key(api_key):
16
  os.environ['OPENAI_API_KEY'] = str(api_key)
@@ -20,18 +30,44 @@ def user(user_message, history):
20
  return "", history + [[user_message, None]]
21
 
22
  def respond(message, chat_history):
 
 
23
  question = str(message)
24
  chain = openai_chain(inp_dir=dir)
 
25
  start_time = time.time()
26
- output = chain.get_response(query=question, k=100, model_name="gpt-4-1106-preview", type="stuff")
 
27
  print(output)
28
- time_taken = time.time() - start_time
29
- save_log(file_path='logs/policy_combined.csv', query=question, response=output, model_name="gpt-4-1106-preview", time_taken=time_taken, inp="Policy", data="Policy/1")
 
 
 
 
30
  bot_message = output
31
  chat_history.append((message, bot_message))
 
32
  time.sleep(2)
33
  return " ", chat_history
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
36
  gr.HTML(title)
37
 
@@ -40,15 +76,32 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
40
 
41
  chatbot = gr.Chatbot(height=750)
42
  msg = gr.Textbox(label="Send a message", placeholder="Send a message",
43
- show_label=False, container=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
 
46
 
47
- gr.Examples([
48
- ["What are the challenges and opportunities of AI in supply chain management?"],
49
- ["What does these documents talk about?"],
50
 
51
- ], inputs=msg, label= "Click on any example to copy in the chatbox"
 
 
52
  )
53
 
54
  gr.HTML(description)
 
2
 
3
  import gradio as gr
4
  import time
5
+ import datetime
6
  import os
7
+
8
+ from lc_base.chain import openai_chain
9
+ from driveapi.drive import upload_chat_to_drive
10
+
11
+ # global time_diff, model_name, search_type
12
+ time_diff = 0
13
+ model_name="gpt-3.5-turbo-1106"
14
+ search_type = "stuff"
15
+ input_question = ""
16
+ model_response = ""
17
+ user_feedback = ""
18
 
19
  dir = os.path.join("outputs", "combined", "policy_eu_asia_usa", "faiss_index")
20
  # dir = os.path.join("outputs", "policy", "1", "faiss_index")
21
 
22
+ title = """<h1 align="center">ResearchBuddy</h1>"""
23
+ description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
24
 
25
  def save_api_key(api_key):
26
  os.environ['OPENAI_API_KEY'] = str(api_key)
 
30
  return "", history + [[user_message, None]]
31
 
32
  def respond(message, chat_history):
33
+
34
+ global time_diff, model_response, input_question
35
  question = str(message)
36
  chain = openai_chain(inp_dir=dir)
37
+
38
  start_time = time.time()
39
+
40
+ output = chain.get_response(query=question, k=10, model_name=model_name, type=search_type)
41
  print(output)
42
+
43
+ # Update global variables to log
44
+ time_diff = time.time() - start_time
45
+ model_response = output
46
+ input_question = question
47
+
48
  bot_message = output
49
  chat_history.append((message, bot_message))
50
+
51
  time.sleep(2)
52
  return " ", chat_history
53
 
54
+ def save_feedback(feedback):
55
+ global user_feedback
56
+ user_feedback = feedback
57
+
58
+ curr_date = datetime.datetime.now()
59
+ file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}.csv"
60
+ log_data = [
61
+ ["Question", "Response", "Model", "Time", "Feedback"],
62
+ [input_question, model_response, model_name, time_diff, user_feedback]
63
+ ]
64
+
65
+ if user_feedback == "Yes" or feedback == "No":
66
+ upload_chat_to_drive(log_data, file_name)
67
+
68
+ def default_feedback():
69
+ return "🤔"
70
+
71
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
72
  gr.HTML(title)
73
 
 
76
 
77
  chatbot = gr.Chatbot(height=750)
78
  msg = gr.Textbox(label="Send a message", placeholder="Send a message",
79
+ show_label=False, container=False)
80
+
81
+ with gr.Row():
82
+ with gr.Column():
83
+ gr.Examples([
84
+ ["Explain these documents to me in simpler terms."],
85
+ ["What does these documents talk about?"],
86
+
87
+ ], inputs=msg, label= "Click on any example to copy in the chatbox"
88
+ )
89
+
90
+ with gr.Column():
91
+ feedback_radio = gr.Radio(
92
+ choices=["Yes", "No", "🤔"],
93
+ value=["🤔"],
94
+ label="Did you like the latest response?",
95
+ info="Selecting Yes/No will send the following diagnostic data - Question, Response, Time Taken",
96
+ )
97
 
98
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
99
+ msg.submit(default_feedback, outputs=[feedback_radio])
100
 
 
 
 
101
 
102
+ feedback_radio.change(
103
+ fn=save_feedback,
104
+ inputs=[feedback_radio]
105
  )
106
 
107
  gr.HTML(description)
app_drive.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Application file for Gradio App for OpenAI Model
2
+
3
+ import gradio as gr
4
+ import time
5
+ import datetime
6
+ import os
7
+
8
+ from lc_base.chain import openai_chain
9
+ from driveapi.drive import upload_chat_to_drive
10
+ from driveapi.drive_database import create_chroma_db
11
+
12
+ # global time_diff, model_name, search_type
13
+ time_diff = 0
14
+ # model_name="gpt-3.5-turbo-1106"
15
+ model_name = "gpt-4-1106-preview"
16
+ search_type = "stuff"
17
+ input_question = ""
18
+ model_response = ""
19
+ user_feedback = ""
20
+
21
+ dir = ""
22
+ title = """<h1 align="center">ResearchBuddy</h1>"""
23
+ description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
24
+
25
+ def save_api_key(api_key):
26
+ os.environ['OPENAI_API_KEY'] = str(api_key)
27
+ return f"API Key saved in the environment: {api_key}"
28
+
29
+ def save_drive_link(drive_link):
30
+ os.environ['DRIVE_LINK'] = str(drive_link)
31
+ print(f"API Key saved in the environment: {drive_link}")
32
+ return None
33
+
34
+ def create_data_from_drive():
35
+ global db
36
+ db = create_chroma_db()
37
+ return "Processing Completed - You can start the chat now!"
38
+
39
+ def user(user_message, history):
40
+ return "", history + [[user_message, None]]
41
+
42
+ def respond(message, chat_history):
43
+
44
+ global time_diff, model_response, input_question
45
+
46
+ print("Database is ...................")
47
+ print(type(db))
48
+ question = str(message)
49
+ chain = openai_chain(inp_dir=dir)
50
+ # prompt = '''You are an AI assistant equipped with advanced analytical capabilities.
51
+ # You have been provided with a carefully curated set of documents relevant to a specific question.
52
+ # Your task is to meticulously analyze these documents and provide a comprehensive answer to the following question.
53
+ # Ensure that your response is detailed, accurate, and maintains a formal, academic tone.
54
+ # The information required to answer this question is contained within the documents.
55
+ # Please proceed with a thorough examination to deliver a well-informed response. Question: '''
56
+
57
+ # query = prompt + question
58
+ query = question
59
+
60
+ start_time = time.time()
61
+
62
+ output = chain.get_response_from_drive(query=query, database=db, k=10, model_name=model_name, type=search_type)
63
+ print(output)
64
+
65
+ # Update global variables to log
66
+ time_diff = time.time() - start_time
67
+ model_response = output
68
+ input_question = question
69
+
70
+ bot_message = output
71
+ chat_history.append((message, bot_message))
72
+
73
+ time.sleep(2)
74
+ return " ", chat_history
75
+
76
+ def save_feedback(feedback):
77
+ global user_feedback
78
+ user_feedback = feedback
79
+
80
+ curr_date = datetime.datetime.now()
81
+ file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}_{curr_date.second}.csv"
82
+ log_data = [
83
+ ["Question", "Response", "Model", "Time", "Feedback"],
84
+ [input_question, model_response, model_name, time_diff, user_feedback]
85
+ ]
86
+
87
+ if user_feedback != "🤔":
88
+ upload_chat_to_drive(log_data, file_name)
89
+
90
+ def default_feedback():
91
+ return "🤔"
92
+
93
+ def text_feedback(feedback):
94
+ global text_feedback
95
+ text_feedback = feedback
96
+
97
+ curr_date = datetime.datetime.now()
98
+ file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}_{curr_date.second}.csv"
99
+ log_data = [
100
+ ["Question", "Response", "Model", "Time", "Feedback"],
101
+ [input_question, model_response, model_name, time_diff, text_feedback]
102
+ ]
103
+
104
+ upload_chat_to_drive(log_data, file_name)
105
+
106
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
107
+ gr.HTML(title)
108
+
109
+ global db
110
+
111
+ with gr.Row():
112
+ with gr.Column():
113
+ api_key_input = gr.Textbox(lines=1, label="Enter your OpenAI API Key, then press Enter...")
114
+
115
+ with gr.Column():
116
+ drive_link_input = gr.Textbox(lines=1, label="Enter your shared drive link, then press Enter...")
117
+
118
+ with gr.Row():
119
+ process_files_input = gr.Button(value="Process files")
120
+
121
+ with gr.Row():
122
+ status_message = gr.Text(label="Status", value="Click - Process Files")
123
+
124
+
125
+
126
+ api_key_input.submit(save_api_key, [api_key_input])
127
+ drive_link_input.submit(fn=save_drive_link, inputs=[drive_link_input])
128
+ drive_link_check = os.environ.get("DRIVE_LINK")
129
+ process_files_input.click(fn=create_data_from_drive, outputs=status_message)
130
+
131
+ chatbot = gr.Chatbot(height=750)
132
+ msg = gr.Textbox(label="Send a message", placeholder="Send a message",
133
+ show_label=False, container=False)
134
+
135
+ with gr.Row():
136
+ with gr.Column():
137
+ gr.Examples([
138
+ ["Explain these documents to me in simpler terms."],
139
+ ["What does these documents talk about?"],
140
+ ["Give the key topics covered in these documents in less than 10 words."],
141
+ ["What are the key findings in these documents?"],
142
+ ], inputs=msg, label= "Click on any example to copy in the chatbox"
143
+ )
144
+
145
+ with gr.Row():
146
+ with gr.Column():
147
+ feedback_radio = gr.Radio(
148
+ choices=["1", "2", "3", "4", "5", "6", "🤔"],
149
+ value=["🤔"],
150
+ label="How would you rate the current response?",
151
+ info="Choosing a number sends the following diagnostic data to the developer - Question, Response, Time Taken. Let it be 🤔 to not send any data.",
152
+ )
153
+
154
+ with gr.Column():
155
+ feedback_text = gr.Textbox(lines=1, label="Additional comments on the current response...")
156
+
157
+
158
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
159
+ msg.submit(default_feedback, outputs=[feedback_radio])
160
+
161
+
162
+ feedback_radio.change(
163
+ fn=save_feedback,
164
+ inputs=[feedback_radio]
165
+ )
166
+
167
+ feedback_text.submit(
168
+ fn=text_feedback,
169
+ inputs=[feedback_text]
170
+ )
171
+
172
+ gr.HTML(description)
173
+
174
+
175
+ chat.queue()
176
+ chat.launch()
driveapi/drive.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ADDING GOOGLE DRIVE SUPPORT
2
+
3
+ import io
4
+ import os
5
+ import csv
6
+ import PyPDF2
7
+
8
+ from google.oauth2 import service_account
9
+ from googleapiclient.discovery import build
10
+ from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
11
+ from driveapi.service import get_credentials
12
+
13
+ credentials_info = get_credentials()
14
+ credentials = service_account.Credentials.from_service_account_info(credentials_info)
15
+ service = build('drive', 'v3', credentials=credentials)
16
+
17
+ logs_id = os.environ.get('LOGS_ID')
18
+
19
+ # Save Logs
20
+ def upload_chat_to_drive(chat_history, file_name):
21
+ # Convert chat history to CSV
22
+ csv_output = io.StringIO()
23
+ writer = csv.writer(csv_output)
24
+ writer.writerows(chat_history)
25
+ csv_output.seek(0)
26
+
27
+ # File metadata
28
+ file_metadata = {
29
+ 'name': file_name,
30
+ 'mimeType': 'application/vnd.google-apps.spreadsheet',
31
+ 'parents': [logs_id]
32
+ }
33
+
34
+ # Upload file
35
+ media = MediaIoBaseUpload(csv_output, mimetype='text/csv')
36
+ file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
37
+
38
+
39
+ ## Read PDF files
40
+ def download_file(file_id):
41
+ service = build('drive', 'v3', credentials=credentials)
42
+ request = service.files().get_media(fileId=file_id)
43
+ fh = io.BytesIO()
44
+ downloader = MediaIoBaseDownload(fh, request)
45
+ done = False
46
+ while done is False:
47
+ status, done = downloader.next_chunk()
48
+ fh.seek(0)
49
+ return fh
50
+
51
+ # Function to process a PDF file
52
+ def process_pdf(file_stream):
53
+ pdf_reader = PyPDF2.PdfReader(file_stream)
54
+ text = ""
55
+ for page_num in range(len(pdf_reader.pages)):
56
+ page = pdf_reader.pages[page_num]
57
+ text += page.extract_text()
58
+ return text
59
+
60
+ def drive_content(shared_folder_id):
61
+ # List files in the folder
62
+ results = service.files().list(q=f"'{shared_folder_id}' in parents", fields="files(id, name, mimeType)").execute()
63
+ items = results.get('files', [])
64
+
65
+ content = ''
66
+ for item in items:
67
+ print(f"Processing file: {item['name']}")
68
+ file_stream = download_file(item['id'])
69
+ content += str(process_pdf(file_stream))
70
+
71
+ return content
driveapi/drive_database.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Change this in gradio
2
+ import os
3
+ from driveapi.drive import drive_content
4
+ from driveapi.service import get_shared_folder_id
5
+
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.vectorstores import FAISS
9
+
10
+ # drive_shared_link = os.environ.get('DRIVE_LINK')
11
+ # shared_folder_id = get_shared_folder_id(drive_shared_link)
12
+
13
+ def create_chroma_db():
14
+ drive_shared_link = os.environ.get('DRIVE_LINK')
15
+ if drive_shared_link == None:
16
+ return ""
17
+ shared_folder_id = get_shared_folder_id(drive_shared_link)
18
+ raw_text = drive_content(shared_folder_id)
19
+ embedding = OpenAIEmbeddings()
20
+
21
+ text_splitter = CharacterTextSplitter(
22
+ separator = "\n",
23
+ chunk_size = 1000,
24
+ chunk_overlap = 200,
25
+ length_function = len,
26
+ )
27
+ texts = text_splitter.split_text(raw_text)
28
+ print('Length of text: ' + str(len(raw_text)))
29
+ db = FAISS.from_texts(texts, embedding)
30
+
31
+ return db
driveapi/service.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def get_credentials():
4
+ # Authenticate with the service account
5
+ client_email = os.environ.get('GOOGLE_CLIENT_EMAIL')
6
+ private_key = os.environ.get('GOOGLE_PRIVATE_KEY').replace('\\n', '\n')
7
+ token_uri = os.environ.get('GOOGLE_TOKEN_URI')
8
+
9
+ # Create credentials object
10
+ credentials_info = {
11
+ 'type': 'service_account',
12
+ 'client_email': client_email,
13
+ 'private_key': private_key,
14
+ 'token_uri': token_uri,
15
+ 'scopes': ['https://www.googleapis.com/auth/drive']
16
+ }
17
+
18
+ return credentials_info
19
+
20
+
21
+ def get_shared_folder_id(drive_shared_link):
22
+
23
+ start = drive_shared_link.find('/folders/') + 9
24
+ end = drive_shared_link.find('?usp=sharing', start)
25
+ shared_folder_id = str(drive_shared_link[start:end])
26
+
27
+ return shared_folder_id
28
+
29
+
30
+
lc_base/chain.py CHANGED
@@ -27,43 +27,14 @@ class openai_chain():
27
 
28
  return response
29
 
 
 
 
30
 
31
-
32
- """
33
- TODO
34
-
35
- 1) Map_Reduce - 7 mins just to process a 27 page report
36
- 1.5) Map_reduce on smaller reports -
37
- 2) Stuff - 30 secs to process and give the output
38
-
39
- Check condition? <4K use #stuff else use #Map_reduce
40
-
41
- Potential Errors
42
-
43
- Explain the key points of this report in detail.
44
-
45
- I'm sorry, but I can't provide a detailed explanation of the key points of the report as the relevant portion of the document you provided does not contain any specific information about the report or its key points. It mainly talks about the services provided by Boston Consulting Group (BCG) and how they aim to help clients and make a positive impact in the world. If you have access to the full report, please provide more specific information or context, and I'll be happy to assist you further.
46
-
47
- Preprocess data - remove \n or blank spaces
48
-
49
-
50
- def remove_newlines(serie):
51
- serie = serie.str.replace('\n', ' ')
52
- serie = serie.str.replace('\\n', ' ')
53
- serie = serie.str.replace(' ', ' ')
54
- serie = serie.str.replace(' ', ' ')
55
- return serie
56
-
57
- llmsherpa -> nlmatics for PDF reading into subsections
58
- Use GPT-4? -> check speed
59
- Use two modes -> summarizer wholedoc + chat top3doc
60
-
61
- compare summarizing answers by stuff and map_reduce
62
-
63
- use top6 across the paper -> 500 each
64
- top 40 -> 100 each
65
 
66
- Take consulting reports -> find top 10 challenges
67
- Take government reports from eu, china, india -> ask questions about the challenges based on countries
68
- Why EU is agging behind in AI? and so on..
69
- """
 
27
 
28
  return response
29
 
30
+ def get_response_from_drive(self, query, database, k=3, type="stuff", model_name="gpt-3.5-turbo"):
31
+ # Get relevant docs
32
+ docs = database.similarity_search(query, k=k)
33
 
34
+ # Create chain
35
+ chain = load_qa_chain(ChatOpenAI(model=model_name), chain_type=type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ #Get Response
38
+ response = chain.run(input_documents=docs, question=query)
39
+
40
+ return response