Spaces:

michaelmc1618
/

Hawkeye_AI

Sleeping

App Files Files Community

michaelmc1618 commited on Jun 27, 2024

Commit

a6b2b74

verified ·

1 Parent(s): 07a3b9b

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -197

app.py CHANGED Viewed

@@ -1,33 +1,25 @@
 import os
-import tempfile
-import torch
-import yt_dlp as youtube_dl
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForMaskedLM, AutoProcessor, AutoModelForSpeechSeq2Seq
 from huggingface_hub import InferenceClient
 from datasets import load_dataset
 import fitz  # PyMuPDF
-from transformers.pipelines.audio_utils import ffmpeg_read
-# Constants for Whisper ASR
-MODEL_NAME = "openai/whisper-large-v3"
-BATCH_SIZE = 8
-FILE_LIMIT_MB = 1000
-YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
-device = 0 if torch.cuda.is_available() else "cpu"
-# Load the Whisper model and processor
-processor = AutoProcessor.from_pretrained(MODEL_NAME)
-model_s2s = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_NAME)
-# Load the BERT model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
-model = AutoModelForMaskedLM.from_pretrained("google-bert/bert-base-uncased")
-# Create the fill-mask pipeline
-pipe = pipeline("fill-mask", model=model, tokenizer=tokenizer)
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
@@ -48,123 +40,49 @@ def respond(
     messages.append({"role": "user", "content": message})
-    try:
-        response = ""
-        for message in client.chat_completion(
-            messages,
-            max_tokens=max_tokens,
-            stream=True,
-            temperature=temperature,
-            top_p=top_p,
-        ):
-            token = message.choices[0].delta.content
-            if token is not None:
-                response += token
-            yield response, history + [(message, response)]
-    except Exception as e:
-        print(f"Error during chat completion: {e}")
-        yield "An error occurred during the chat completion.", history
 def generate_case_outcome(prosecutor_response, defense_response):
-    prompt = f"Prosecutor's arguments: {prosecutor_response}\n\nDefense's arguments: {defense_response}\n\nProvide details on who won the case and why. Provide reasons for your decision and provide a link to the source of the case."
     evaluation = ""
-    try:
-        for message in client.chat_completion(
-            [{"role": "system", "content": "You are a legal expert evaluating the details of the case presented by the prosecution and the defense."},
-             {"role": "user", "content": prompt}],
-            max_tokens=512,
-            stream=True,
-            temperature=0.6,
-            top_p=0.95,
-        ):
-            token = message.choices[0].delta.content
-            if token is not None:
-                evaluation += token
-    except Exception as e:
-        print(f"Error during case outcome generation: {e}")
-        return "An error occurred during the case outcome generation."
     return evaluation
-def determine_outcome(outcome):
-    prosecutor_count = outcome.split().count("Prosecutor")
-    defense_count = outcome.split().count("Defense")
-    if prosecutor_count > defense_count:
         return "Prosecutor Wins"
-    elif defense_count > prosecutor_count:
         return "Defense Wins"
     else:
         return "No clear winner"
-def transcribe(inputs, task):
-    if inputs is None:
-        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    inputs = processor(inputs, return_tensors="pt", sampling_rate=16000).to(device)
-    with torch.no_grad():
-        generated_ids = model_s2s.generate(inputs["input_features"])
-    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return transcription
-def _return_yt_html_embed(yt_url):
-    video_id = yt_url.split("?v=")[-1]
-    HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
-    )
-    return HTML_str
-def download_yt_audio(yt_url, filename):
-    info_loader = youtube_dl.YoutubeDL()
-    try:
-        info = info_loader.extract_info(yt_url, download=False)
-    except youtube_dl.utils.DownloadError as err:
-        raise gr.Error(str(err))
-    file_length = info["duration_string"]
-    file_h_m_s = file_length.split(":")
-    file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
-    if len(file_h_m_s) == 1:
-        file_h_m_s.insert(0, 0)
-    if len(file_h_m_s) == 2:
-        file_h_m_s.insert(0, 0)
-    file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
-    if file_length_s > YT_LENGTH_LIMIT_S:
-        yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
-        file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
-        raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
-    ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
-            ydl.download([yt_url])
-        except youtube_dl.utils.ExtractorError as err:
-            raise gr.Error(str(err))
-def yt_transcribe(yt_url, task, max_filesize=75.0):
-    html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        filepath = os.path.join(tmpdirname, "video.mp4")
-        download_yt_audio(yt_url, filepath)
-        with open(filepath, "rb") as f:
-            inputs = f.read()
-    inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
-    inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
-    inputs = processor(inputs, return_tensors="pt", sampling_rate=16000).to(device)
-    with torch.no_grad():
-        generated_ids = model_s2s.generate(inputs["input_features"])
-    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return html_embed_str, transcription
-# Custom CSS for white background and black text for input and output boxes
 custom_css = """
 body {
     background-color: #ffffff;
@@ -253,17 +171,63 @@ def chat_between_bots(system_message1, system_message2, max_tokens, temperature,
     response2 = response2[:max_length]
     outcome = generate_case_outcome(response1, response2)
-    winner = determine_outcome(outcome)
-    return response1, response2, history1, history2, shared_history, outcome
 def get_top_10_cases():
-    prompt = "List 10 high-profile legal cases that have received significant media attention and are currently ongoing. Just a list of case names and numbers."
     response = ""
     for message in client.chat_completion(
-        [{"role": "system", "content": "You are a legal research expert, able to provide information about high-profile legal cases."},
-         {"role": "user", "content": prompt}],
-        max_tokens=512,
         stream=True,
         temperature=0.6,
         top_p=0.95,
@@ -271,14 +235,8 @@ def get_top_10_cases():
         token = message.choices[0].delta.content
         if token is not None:
             response += token
-    return response
-def add_message(history, message):
-    for x in message["files"]:
-        history.append(((x,), None))
-    if message["text"] is not None:
-        history.append((message["text"], None))
-    return history, gr.MultimodalTextbox(value=None, interactive=True)
 def print_like_dislike(x: gr.LikeData):
     print(x.index, x.value, x.liked)
@@ -290,32 +248,22 @@ def save_conversation(history1, history2, shared_history):
     return history1, history2, shared_history
 def ask_about_case_outcome(shared_history, question):
-    prompt = f"Case Outcome: {shared_history}\n\nQuestion: {question}\n\nAnswer:"
-    response = ""
-    for message in client.chat_completion(
-        [{"role": "system", "content": "You are a legal expert answering questions based on the case outcome provided."},
-         {"role": "user", "content": prompt}],
-        max_tokens=512,
-        stream=True,
-        temperature=0.6,
-        top_p=0.95,
-    ):
-        token = message.choices[0].delta.content
-        if token is not None:
-            response += token
-    return response
 with gr.Blocks(css=custom_css) as demo:
     history1 = gr.State([])
     history2 = gr.State([])
     shared_history = gr.State([])
     top_10_cases = gr.State("")
     with gr.Tab("Argument Evaluation"):
         with gr.Row():
             with gr.Column(scale=1):
                 top_10_btn = gr.Button("Give me the top 10 cases")
-                top_10_output = gr.Textbox(label="Top 10 Cases", interactive=False, elem_classes=["scroll-box"])
                 top_10_btn.click(get_top_10_cases, outputs=top_10_output)
             with gr.Column(scale=2):
                 message = gr.Textbox(label="Case to Argue")
@@ -336,56 +284,53 @@ with gr.Blocks(css=custom_css) as demo:
                     with gr.Column(scale=1):
                         defense_score_color = gr.HTML()
-                outcome = gr.Textbox(label="Outcome", interactive=False, elem_classes=["scroll-box"])
                 with gr.Row():
                     submit_btn = gr.Button("Argue")
                     clear_btn = gr.Button("Clear and Reset")
                     save_btn = gr.Button("Save Conversation")
-                submit_btn.click(chat_between_bots, inputs=[system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message], outputs=[prosecutor_response, defense_response, history1, history2, shared_history, outcome])
-                clear_btn.click(reset_conversation, outputs=[history1, history2, shared_history, prosecutor_response, defense_response, outcome])
                 save_btn.click(save_conversation, inputs=[history1, history2, shared_history], outputs=[history1, history2, shared_history])
-    with gr.Tab("Practice Arguments"):
-        mf_transcribe = gr.Interface(
-            fn=transcribe,
-            inputs=[
-                gr.Audio(type="filepath", label="Record or Upload Audio"),
-                gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-            ],
-            outputs="text",
-            layout="horizontal",
-            title="Practice Legal Arguments - Microphone",
-            description=(
-                "Practice your legal arguments by recording them through your microphone or uploading an audio file. The arguments will be transcribed for review."
-            ),
-            allow_flagging="never",
-        )
-        yt_transcribe = gr.Interface(
-            fn=yt_transcribe,
-            inputs=[
-                gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-                gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
-            ],
-            outputs=["html", "text"],
-            layout="horizontal",
-            title="Practice Legal Arguments - YouTube",
-            description=(
-                "Practice your legal arguments by providing a YouTube video link. The arguments will be transcribed for review."
-            ),
-            allow_flagging="never",
         )
-        gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Microphone", "YouTube"])
-    with gr.Tab("Case Outcome Chat"):
-        case_question = gr.Textbox(label="Ask a Question about the Case Outcome")
-        case_answer = gr.Textbox(label="Answer", interactive=False, elem_classes=["scroll-box"])
-        ask_case_btn = gr.Button("Ask")
-        ask_case_btn.click(ask_about_case_outcome, inputs=[shared_history, case_question], outputs=case_answer)
 demo.queue()
 demo.launch()

 import os
+os.system('pip install transformers')
+os.system('pip install datasets')
+os.system('pip install gradio')
+os.system('pip install minijinja')
+os.system('pip install PyMuPDF')
 import gradio as gr
 from huggingface_hub import InferenceClient
+from transformers import pipeline
 from datasets import load_dataset
 import fitz  # PyMuPDF
+# Load dataset
+dataset = load_dataset("ibunescu/qa_legal_dataset_train")
+# Different pipelines for different tasks
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
+summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
+mask_filling_pipeline = pipeline("fill-mask", model="nlpaueb/legal-bert-base-uncased")
+# Inference client for chat completion
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        if token is not None:
+            response += token
+        yield response, history + [(message, response)]
 def generate_case_outcome(prosecutor_response, defense_response):
+    prompt = f"Prosecutor's Argument: {prosecutor_response}\nDefense Attorney's Argument: {defense_response}\n\nEvaluate both arguments, point out the strengths and weaknesses, and determine who won the case. Provide reasons for your decision."
     evaluation = ""
+    for message in client.chat_completion(
+        [{"role": "system", "content": "You are a legal expert evaluating the arguments presented by the prosecution and the defense."},
+         {"role": "user", "content": prompt}],
+        max_tokens=512,
+        stream=True,
+        temperature=0.6,
+        top_p=0.95,
+    ):
+        token = message.choices[0].delta.content
+        if token is not None:
+            evaluation += token
     return evaluation
+def determine_winner(outcome):
+    if "Prosecutor" in outcome and "Defense" in outcome:
+        if outcome.count("Prosecutor") > outcome.count("Defense"):
+            return "Prosecutor Wins"
+        else:
+            return "Defense Wins"
+    elif "Prosecutor" in outcome:
         return "Prosecutor Wins"
+    elif "Defense" in outcome:
         return "Defense Wins"
     else:
         return "No clear winner"
+# Custom CSS for a clean layout
 custom_css = """
 body {
     background-color: #ffffff;
     response2 = response2[:max_length]
     outcome = generate_case_outcome(response1, response2)
+    winner = determine_winner(outcome)
+    return response1, response2, history1, history2, shared_history, outcome, winner
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    doc = fitz.open(pdf_file)
+    for page in doc:
+        text += page.get_text()
+    return text
+def ask_about_pdf(pdf_text, question):
+    result = qa_pipeline(question=question, context=pdf_text)
+    return result['answer']
+def update_pdf_gallery_and_extract_text(pdf_files):
+    if len(pdf_files) > 0:
+        pdf_text = extract_text_from_pdf(pdf_files[0].name)
+    else:
+        pdf_text = ""
+    return pdf_files, pdf_text
 def get_top_10_cases():
+    # Here, I'm generating a list of 10 example cases. In a real-world scenario, you'd fetch this data from a database or another source.
+    cases = [
+        {"name": "Smith v. Jones", "number": "CA12345"},
+        {"name": "Johnson v. State", "number": "CA67890"},
+        {"name": "Doe v. Roe", "number": "CA11223"},
+        {"name": "Brown v. Davis", "number": "CA44556"},
+        {"name": "Williams v. Taylor", "number": "CA77889"},
+        {"name": "Miller v. Anderson", "number": "CA99100"},
+        {"name": "Davis v. Martin", "number": "CA22334"},
+        {"name": "Garcia v. Clark", "number": "CA55667"},
+        {"name": "Rodriguez v. Lewis", "number": "CA88990"},
+        {"name": "Martinez v. Lee", "number": "CA10112"}
+    ]
+    return "\n".join([f"{case['name']} - Case Number: {case['number']}" for case in cases])
+def add_message(history, message):
+    for x in message["files"]:
+        history.append(((x,), None))
+    if message["text"] is not None:
+        history.append((message["text"], None))
+    return history, gr.MultimodalTextbox(value=None, interactive=False)
+def bot(history):
+    system_message = "You are a helpful assistant."
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
     response = ""
     for message in client.chat_completion(
+        messages,
+        max_tokens=150,
         stream=True,
         temperature=0.6,
         top_p=0.95,
         token = message.choices[0].delta.content
         if token is not None:
             response += token
+        history[-1][1] = response
+        yield history
 def print_like_dislike(x: gr.LikeData):
     print(x.index, x.value, x.liked)
     return history1, history2, shared_history
 def ask_about_case_outcome(shared_history, question):
+    result = qa_pipeline(question=question, context=shared_history)
+    return result['answer']
 with gr.Blocks(css=custom_css) as demo:
     history1 = gr.State([])
     history2 = gr.State([])
     shared_history = gr.State([])
+    pdf_files = gr.State([])
+    pdf_text = gr.State("")
     top_10_cases = gr.State("")
     with gr.Tab("Argument Evaluation"):
         with gr.Row():
             with gr.Column(scale=1):
                 top_10_btn = gr.Button("Give me the top 10 cases")
+                top_10_output = gr.Markdown(elem_classes=["scroll-box"])
                 top_10_btn.click(get_top_10_cases, outputs=top_10_output)
             with gr.Column(scale=2):
                 message = gr.Textbox(label="Case to Argue")
                     with gr.Column(scale=1):
                         defense_score_color = gr.HTML()
+                shared_argument = gr.Textbox(label="Case Outcome", interactive=True, elem_classes=["scroll-box"])
+                winner = gr.Textbox(label="Winner", interactive=False, elem_classes=["scroll-box"])
                 with gr.Row():
                     submit_btn = gr.Button("Argue")
                     clear_btn = gr.Button("Clear and Reset")
                     save_btn = gr.Button("Save Conversation")
+                submit_btn.click(chat_between_bots, inputs=[system_message1, system_message2, max_tokens, temperature, top_p, history1, history2, shared_history, message], outputs=[prosecutor_response, defense_response, history1, history2, shared_argument, winner])
+                clear_btn.click(reset_conversation, outputs=[history1, history2, shared_history, prosecutor_response, defense_response, shared_argument, winner])
                 save_btn.click(save_conversation, inputs=[history1, history2, shared_history], outputs=[history1, history2, shared_history])
+                # Inner HTML for asking about the case outcome
+                with gr.Row():
+                    case_question = gr.Textbox(label="Ask a Question about the Case Outcome")
+                    case_answer = gr.Textbox(label="Answer", interactive=False, elem_classes=["scroll-box"])
+                    ask_case_btn = gr.Button("Ask")
+                    ask_case_btn.click(ask_about_case_outcome, inputs=[shared_history, case_question], outputs=case_answer)
+    with gr.Tab("PDF Management"):
+        pdf_upload = gr.File(label="Upload Case Files (PDF)", file_types=[".pdf"])
+        pdf_gallery = gr.Gallery(label="PDF Gallery")
+        pdf_view = gr.Textbox(label="PDF Content", interactive=False, elem_classes=["scroll-box"])
+        pdf_question = gr.Textbox(label="Ask a Question about the PDF")
+        pdf_answer = gr.Textbox(label="Answer", interactive=False, elem_classes=["scroll-box"])
+        pdf_upload_btn = gr.Button("Update PDF Gallery")
+        pdf_ask_btn = gr.Button("Ask")
+        pdf_upload_btn.click(update_pdf_gallery_and_extract_text, inputs=[pdf_upload], outputs=[pdf_gallery, pdf_text])
+        pdf_text.change(fn=lambda x: x, inputs=pdf_text, outputs=pdf_view)
+        pdf_ask_btn.click(ask_about_pdf, inputs=[pdf_text, pdf_question], outputs=pdf_answer)
+    with gr.Tab("Chatbot"):
+        chatbot = gr.Chatbot(
+            [],
+            elem_id="chatbot",
+            bubble_full_width=False
         )
+        chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
+        chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+        bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
+        bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
+        chatbot.like(print_like_dislike, None, None)
 demo.queue()
 demo.launch()