Spaces:

Koshti10
/

Chat_literature

Sleeping

App Files Files Community

Koshti10 commited on Feb 23, 2024

Commit

2927735

verified ·

1 Parent(s): cdc9be2

Upload 9 files

Browse files

Files changed (6) hide show

app.py +65 -12
app_drive.py +176 -0
driveapi/drive.py +71 -0
driveapi/drive_database.py +31 -0
driveapi/service.py +30 -0
lc_base/chain.py +9 -38

app.py CHANGED Viewed

@@ -2,15 +2,25 @@
 import gradio as gr
 import time
-from lc_base.chain import openai_chain
 import os
-from lc_base.logs import save_log
 dir = os.path.join("outputs", "combined", "policy_eu_asia_usa", "faiss_index")
 # dir = os.path.join("outputs", "policy", "1", "faiss_index")
-title = """<h1 align="center">Chat</h1>"""
-description = """<br><br><h3 align="center">This is a literature chat model, which can currently answer questions to AI Policies provided.</h3>"""
 def save_api_key(api_key):
     os.environ['OPENAI_API_KEY'] = str(api_key)
@@ -20,18 +30,44 @@ def user(user_message, history):
     return "", history + [[user_message, None]]
 def respond(message, chat_history):
     question = str(message)
     chain = openai_chain(inp_dir=dir)
     start_time = time.time()
-    output = chain.get_response(query=question, k=100, model_name="gpt-4-1106-preview", type="stuff")
     print(output)
-    time_taken = time.time() - start_time
-    save_log(file_path='logs/policy_combined.csv', query=question, response=output, model_name="gpt-4-1106-preview", time_taken=time_taken, inp="Policy", data="Policy/1")
     bot_message = output
     chat_history.append((message, bot_message))
     time.sleep(2)
     return " ", chat_history
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
     gr.HTML(title)
@@ -40,15 +76,32 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
     chatbot = gr.Chatbot(height=750)
     msg = gr.Textbox(label="Send a message", placeholder="Send a message",
-                             show_label=False, container=False)
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
-    gr.Examples([
-        ["What are the challenges and opportunities of AI in supply chain management?"],
-        ["What does these documents talk about?"],
-    ], inputs=msg, label= "Click on any example to copy in the chatbox"
     )
     gr.HTML(description)

 import gradio as gr
 import time
+import datetime
 import os
+from lc_base.chain import openai_chain
+from driveapi.drive import upload_chat_to_drive
+# global time_diff, model_name, search_type
+time_diff = 0
+model_name="gpt-3.5-turbo-1106"
+search_type = "stuff"
+input_question = ""
+model_response = ""
+user_feedback = ""
 dir = os.path.join("outputs", "combined", "policy_eu_asia_usa", "faiss_index")
 # dir = os.path.join("outputs", "policy", "1", "faiss_index")
+title = """<h1 align="center">ResearchBuddy</h1>"""
+description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
 def save_api_key(api_key):
     os.environ['OPENAI_API_KEY'] = str(api_key)
     return "", history + [[user_message, None]]
 def respond(message, chat_history):
+    global time_diff, model_response, input_question
     question = str(message)
     chain = openai_chain(inp_dir=dir)
     start_time = time.time()
+    output = chain.get_response(query=question, k=10, model_name=model_name, type=search_type)
     print(output)
+    # Update global variables to log
+    time_diff = time.time() - start_time
+    model_response = output
+    input_question = question
     bot_message = output
     chat_history.append((message, bot_message))
     time.sleep(2)
     return " ", chat_history
+def save_feedback(feedback):
+    global user_feedback
+    user_feedback = feedback
+    curr_date = datetime.datetime.now()
+    file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}.csv"
+    log_data = [
+        ["Question", "Response", "Model", "Time", "Feedback"],
+        [input_question, model_response, model_name, time_diff, user_feedback]
+    ]
+    if user_feedback == "Yes" or  feedback == "No":
+        upload_chat_to_drive(log_data, file_name)
+def default_feedback():
+    return "🤔"
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
     gr.HTML(title)
     chatbot = gr.Chatbot(height=750)
     msg = gr.Textbox(label="Send a message", placeholder="Send a message",
+                             show_label=False, container=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Examples([
+                ["Explain these documents to me in simpler terms."],
+                ["What does these documents talk about?"],
+            ], inputs=msg, label= "Click on any example to copy in the chatbox"
+            )
+        with gr.Column():
+            feedback_radio = gr.Radio(
+                choices=["Yes", "No", "🤔"],
+                value=["🤔"],
+                label="Did you like the latest response?",
+                info="Selecting Yes/No will send the following diagnostic data - Question, Response, Time Taken",
+            )
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    msg.submit(default_feedback, outputs=[feedback_radio])
+    feedback_radio.change(
+        fn=save_feedback,
+        inputs=[feedback_radio]
     )
     gr.HTML(description)

app_drive.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Application file for Gradio App for OpenAI Model
+import gradio as gr
+import time
+import datetime
+import os
+from lc_base.chain import openai_chain
+from driveapi.drive import upload_chat_to_drive
+from driveapi.drive_database import create_chroma_db
+# global time_diff, model_name, search_type
+time_diff = 0
+# model_name="gpt-3.5-turbo-1106"
+model_name = "gpt-4-1106-preview"
+search_type = "stuff"
+input_question = ""
+model_response = ""
+user_feedback = ""
+dir = ""
+title = """<h1 align="center">ResearchBuddy</h1>"""
+description = """<br><br><h3 align="center">This is a GPT based Research Buddy to assist in navigating new research topics.</h3>"""
+def save_api_key(api_key):
+    os.environ['OPENAI_API_KEY'] = str(api_key)
+    return f"API Key saved in the environment: {api_key}"
+def save_drive_link(drive_link):
+    os.environ['DRIVE_LINK'] = str(drive_link)
+    print(f"API Key saved in the environment: {drive_link}")
+    return None
+def create_data_from_drive():
+    global db
+    db = create_chroma_db()
+    return "Processing Completed - You can start the chat now!"
+def user(user_message, history):
+    return "", history + [[user_message, None]]
+def respond(message, chat_history):
+    global time_diff, model_response, input_question
+    print("Database is ...................")
+    print(type(db))
+    question = str(message)
+    chain = openai_chain(inp_dir=dir)
+    # prompt = '''You are an AI assistant equipped with advanced analytical capabilities.
+    # You have been provided with a carefully curated set of documents relevant to a specific question.
+    # Your task is to meticulously analyze these documents and provide a comprehensive answer to the following question.
+    # Ensure that your response is detailed, accurate, and maintains a formal, academic tone.
+    # The information required to answer this question is contained within the documents.
+    # Please proceed with a thorough examination to deliver a well-informed response. Question:  '''
+    # query = prompt + question
+    query = question
+    start_time = time.time()
+    output = chain.get_response_from_drive(query=query, database=db, k=10, model_name=model_name, type=search_type)
+    print(output)
+    # Update global variables to log
+    time_diff = time.time() - start_time
+    model_response = output
+    input_question = question
+    bot_message = output
+    chat_history.append((message, bot_message))
+    time.sleep(2)
+    return " ", chat_history
+def save_feedback(feedback):
+    global user_feedback
+    user_feedback = feedback
+    curr_date = datetime.datetime.now()
+    file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}_{curr_date.second}.csv"
+    log_data = [
+        ["Question", "Response", "Model", "Time", "Feedback"],
+        [input_question, model_response, model_name, time_diff, user_feedback]
+    ]
+    if user_feedback != "🤔":
+        upload_chat_to_drive(log_data, file_name)
+def default_feedback():
+    return "🤔"
+def text_feedback(feedback):
+    global text_feedback
+    text_feedback = feedback
+    curr_date = datetime.datetime.now()
+    file_name = f"chat_{curr_date.day}_{curr_date.month}_{curr_date.hour}_{curr_date.minute}_{curr_date.second}.csv"
+    log_data = [
+        ["Question", "Response", "Model", "Time", "Feedback"],
+        [input_question, model_response, model_name, time_diff, text_feedback]
+    ]
+    upload_chat_to_drive(log_data, file_name)
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate")) as chat:
+    gr.HTML(title)
+    global db
+    with gr.Row():
+        with gr.Column():
+            api_key_input = gr.Textbox(lines=1, label="Enter your OpenAI API Key, then press Enter...")
+        with gr.Column():
+            drive_link_input = gr.Textbox(lines=1, label="Enter your shared drive link, then press Enter...")
+    with gr.Row():
+        process_files_input = gr.Button(value="Process files")
+    with gr.Row():
+        status_message = gr.Text(label="Status", value="Click - Process Files")
+    api_key_input.submit(save_api_key, [api_key_input])
+    drive_link_input.submit(fn=save_drive_link, inputs=[drive_link_input])
+    drive_link_check = os.environ.get("DRIVE_LINK")
+    process_files_input.click(fn=create_data_from_drive, outputs=status_message)
+    chatbot = gr.Chatbot(height=750)
+    msg = gr.Textbox(label="Send a message", placeholder="Send a message",
+                             show_label=False, container=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Examples([
+                ["Explain these documents to me in simpler terms."],
+                ["What does these documents talk about?"],
+                ["Give the key topics covered in these documents in less than 10 words."],
+                ["What are the key findings in these documents?"],
+            ], inputs=msg, label= "Click on any example to copy in the chatbox"
+            )
+    with gr.Row():
+        with gr.Column():
+            feedback_radio = gr.Radio(
+                choices=["1", "2", "3", "4", "5", "6", "🤔"],
+                value=["🤔"],
+                label="How would you rate the current response?",
+                info="Choosing a number sends the following diagnostic data to the developer - Question, Response, Time Taken. Let it be 🤔 to not send any data.",
+            )
+        with gr.Column():
+            feedback_text = gr.Textbox(lines=1, label="Additional comments on the current response...")
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    msg.submit(default_feedback, outputs=[feedback_radio])
+    feedback_radio.change(
+        fn=save_feedback,
+        inputs=[feedback_radio]
+    )
+    feedback_text.submit(
+        fn=text_feedback,
+        inputs=[feedback_text]
+    )
+    gr.HTML(description)
+chat.queue()
+chat.launch()

driveapi/drive.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# ADDING GOOGLE DRIVE SUPPORT
+import io
+import os
+import csv
+import PyPDF2
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseUpload, MediaIoBaseDownload
+from driveapi.service import get_credentials
+credentials_info = get_credentials()
+credentials = service_account.Credentials.from_service_account_info(credentials_info)
+service = build('drive', 'v3', credentials=credentials)
+logs_id = os.environ.get('LOGS_ID')
+# Save Logs
+def upload_chat_to_drive(chat_history, file_name):
+    # Convert chat history to CSV
+    csv_output = io.StringIO()
+    writer = csv.writer(csv_output)
+    writer.writerows(chat_history)
+    csv_output.seek(0)
+    # File metadata
+    file_metadata = {
+        'name': file_name,
+        'mimeType': 'application/vnd.google-apps.spreadsheet',
+        'parents': [logs_id]
+    }
+    # Upload file
+    media = MediaIoBaseUpload(csv_output, mimetype='text/csv')
+    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
+## Read PDF files
+def download_file(file_id):
+    service = build('drive', 'v3', credentials=credentials)
+    request = service.files().get_media(fileId=file_id)
+    fh = io.BytesIO()
+    downloader = MediaIoBaseDownload(fh, request)
+    done = False
+    while done is False:
+        status, done = downloader.next_chunk()
+    fh.seek(0)
+    return fh
+# Function to process a PDF file
+def process_pdf(file_stream):
+    pdf_reader = PyPDF2.PdfReader(file_stream)
+    text = ""
+    for page_num in range(len(pdf_reader.pages)):
+        page = pdf_reader.pages[page_num]
+        text += page.extract_text()
+    return text
+def drive_content(shared_folder_id):
+    # List files in the folder
+    results = service.files().list(q=f"'{shared_folder_id}' in parents", fields="files(id, name, mimeType)").execute()
+    items = results.get('files', [])
+    content = ''
+    for item in items:
+        print(f"Processing file: {item['name']}")
+        file_stream = download_file(item['id'])
+        content += str(process_pdf(file_stream))
+    return content

driveapi/drive_database.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Change this in gradio
+import os
+from driveapi.drive import drive_content
+from driveapi.service import get_shared_folder_id
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+# drive_shared_link = os.environ.get('DRIVE_LINK')
+# shared_folder_id = get_shared_folder_id(drive_shared_link)
+def create_chroma_db():
+    drive_shared_link = os.environ.get('DRIVE_LINK')
+    if drive_shared_link == None:
+        return ""
+    shared_folder_id = get_shared_folder_id(drive_shared_link)
+    raw_text = drive_content(shared_folder_id)
+    embedding = OpenAIEmbeddings()
+    text_splitter = CharacterTextSplitter(
+            separator = "\n",
+            chunk_size = 1000,
+            chunk_overlap  = 200,
+            length_function = len,
+        )
+    texts = text_splitter.split_text(raw_text)
+    print('Length of text: ' + str(len(raw_text)))
+    db = FAISS.from_texts(texts, embedding)
+    return db

driveapi/service.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+def get_credentials():
+# Authenticate with the service account
+    client_email = os.environ.get('GOOGLE_CLIENT_EMAIL')
+    private_key = os.environ.get('GOOGLE_PRIVATE_KEY').replace('\\n', '\n')
+    token_uri = os.environ.get('GOOGLE_TOKEN_URI')
+    # Create credentials object
+    credentials_info = {
+        'type': 'service_account',
+        'client_email': client_email,
+        'private_key': private_key,
+        'token_uri': token_uri,
+        'scopes': ['https://www.googleapis.com/auth/drive']
+    }
+    return credentials_info
+def get_shared_folder_id(drive_shared_link):
+    start = drive_shared_link.find('/folders/') + 9
+    end = drive_shared_link.find('?usp=sharing', start)
+    shared_folder_id = str(drive_shared_link[start:end])
+    return shared_folder_id

lc_base/chain.py CHANGED Viewed

@@ -27,43 +27,14 @@ class openai_chain():
         return response
-"""
-TODO
-1) Map_Reduce - 7 mins just to process a 27 page report
-1.5) Map_reduce on smaller reports -
-2) Stuff - 30 secs to process and give the output
-Check condition? <4K use #stuff else use #Map_reduce
-Potential  Errors
-Explain the key points of this report in detail.
-I'm sorry, but I can't provide a detailed explanation of the key points of the report as the relevant portion of the document you provided does not contain any specific information about the report or its key points. It mainly talks about the services provided by Boston Consulting Group (BCG) and how they aim to help clients and make a positive impact in the world. If you have access to the full report, please provide more specific information or context, and I'll be happy to assist you further.
-Preprocess data - remove \n or blank spaces
-def remove_newlines(serie):
-    serie = serie.str.replace('\n', ' ')
-    serie = serie.str.replace('\\n', ' ')
-    serie = serie.str.replace('  ', ' ')
-    serie = serie.str.replace('  ', ' ')
-    return serie
-llmsherpa -> nlmatics for PDF reading into subsections
-Use GPT-4? -> check speed
-Use two modes -> summarizer wholedoc + chat top3doc
-compare summarizing answers by stuff and map_reduce
-use top6 across the paper -> 500 each
-top 40 -> 100 each
-Take consulting reports -> find top 10 challenges
-Take government reports from eu, china, india -> ask questions about the challenges based on countries
-Why EU is agging behind in AI? and so on..
-"""

         return response
+    def get_response_from_drive(self, query, database, k=3, type="stuff", model_name="gpt-3.5-turbo"):
+        # Get relevant docs
+        docs = database.similarity_search(query, k=k)
+        # Create chain
+        chain = load_qa_chain(ChatOpenAI(model=model_name), chain_type=type)
+        #Get Response
+        response = chain.run(input_documents=docs, question=query)
+        return response