Spaces:

lakshyaag
/

w3d1-legal-summarizer-app

Paused

App Files Files Community

lakshyaag commited on Jun 15, 2024

Commit

b8cf9e8

1 Parent(s): fe97ada

update w3d1 space

Browse files

Files changed (5) hide show

.chainlit/config.toml +1 -1
Dockerfile +2 -3
README.md +3 -3
app.py +100 -24
requirements.txt +2 -1

.chainlit/config.toml CHANGED Viewed

@@ -35,7 +35,7 @@ multi_modal = true
 [UI]
 # Name of the app and chatbot.
-name = "Chatbot"
 # Show the readme while the conversation is empty.
 show_readme_as_default = true

 [UI]
 # Name of the app and chatbot.
+name = "Legal Summarizer Chatbot"
 # Show the readme while the conversation is empty.
 show_readme_as_default = true

Dockerfile CHANGED Viewed

@@ -4,8 +4,7 @@ USER user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
 WORKDIR $HOME/app
 COPY --chown=user . $HOME/app
-COPY ./requirements.txt ~/app/requirements.txt
-RUN pip install -r requirements.txt
-COPY . .
 CMD ["chainlit", "run", "app.py", "--port", "7860"]

 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
 WORKDIR $HOME/app
+COPY --chown=user ./requirements.txt $HOME/app/requirements.txt
+RUN pip install -r $HOME/app/requirements.txt
 COPY --chown=user . $HOME/app
 CMD ["chainlit", "run", "app.py", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Legal Summarizer App
-emoji: 🤖
-colorFrom: pink
-colorTo: yellow
 sdk: docker
 pinned: false
 app_port: 7860

 ---
 title: Legal Summarizer App
+emoji: ⚖️
+colorFrom: red
+colorTo: green
 sdk: docker
 pinned: false
 app_port: 7860

app.py CHANGED Viewed

@@ -1,42 +1,118 @@
 import os
-from peft import PeftModel, PeftConfig
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import chainlit as cl  # importing chainlit for our app
-from chainlit.prompt import Prompt, PromptMessage  # importing prompt tools
-from dotenv import load_dotenv
-load_dotenv()
-model_name = "lakshyaag/llama38binstruct_summarize"
-config = PeftConfig.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-base_model = AutoModelForCausalLM.from_pretrained(
-    "NousResearch/Meta-Llama-3-8B-Instruct"
-)
-model = PeftModel.from_pretrained(base_model, model_name)
 # Prompt Templates
-system_template = """Your task is to provide a summary of the provided document."""
-user_template = """{input}"""
 @cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
-    inputs = tokenizer.encode(
-        message,
-        return_tensors="pt",
-    )
-    outputs = model.generate(
-        inputs,
-        max_length=100,
     )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
 if __name__ == "__main__":

 import os
+import chainlit as cl  # importing chainlit for our app
+import torch
+from transformers import (
+    AutoTokenizer,
+    AutoConfig,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig,
+)
+import bitsandbytes as bnb
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 # Prompt Templates
+INSTRUCTION_PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+Please convert the following legal content into a human-readable summary<|eot_id|><|start_header_id|>user<|end_header_id|>
+[LEGAL_DOC]
+{input}
+[END_LEGAL_DOC]<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+"""
+RESPONSE_TEMPLATE = """
+{summary}<|eot_id|>
+"""
+def create_prompt(sample, include_response=False):
+    """
+    Parameters:
+      - sample: dict representing row of dataset
+      - include_response: bool
+    Functionality:
+      This function should build the Python str `full_prompt`.
+      If `include_response` is true, it should include the summary -
+      else it should not contain the summary (useful for prompting) and testing
+    Returns:
+      - full_prompt: str
+    """
+    full_prompt = INSTRUCTION_PROMPT_TEMPLATE.format(input=sample["original_text"])
+    if include_response:
+        full_prompt += RESPONSE_TEMPLATE.format(summary=sample["reference_summary"])
+    full_prompt += "<|end_of_text|>"
+    return full_prompt
+@cl.on_chat_start
+async def start_chat():
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_compute_dtype=torch.float16,
+    )
+    model_id = "lakshyaag/llama38binstruct_summarize"
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        quantization_config=bnb_config,
+        device_map="auto",
+        cache_dir=os.path.join(os.getcwd(), ".cache"),
+    )
+    # Move model to GPU if available
+    if torch.cuda.is_available():
+        model = model.to("cuda")
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_id, cache_dir=os.path.join(os.getcwd(), ".cache")
+    )
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+    cl.user_session.set("model", model)
+    cl.user_session.set("tokenizer", tokenizer)
 @cl.on_message  # marks a function that should be run each time the chatbot receives a message from a user
 async def main(message: cl.Message):
+    model = cl.user_session.get("model")
+    tokenizer = cl.user_session.get("tokenizer")
+    # convert str input into tokenized input
+    encoded_input = tokenizer(message, return_tensors="pt")
+    # send the tokenized inputs to our GPU
+    model_inputs = encoded_input.to("cuda")
+    # generate response and set desired generation parameters
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=256,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
     )
+    # decode output from tokenized output to str output
+    decoded_output = tokenizer.batch_decode(generated_ids)
+    # return only the generated response (not the prompt) as output
+    response = decoded_output[0].split("<|end_header_id|>")[-1]
+    await message.reply(response)
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 chainlit==0.7.700
 transformers==4.41.2
-peft==0.11.1
 tiktoken==0.5.1
 python-dotenv==1.0.0

 chainlit==0.7.700
 transformers==4.41.2
+bitsandbytes==0.43.1
+accelerate==0.31.0
 tiktoken==0.5.1
 python-dotenv==1.0.0