File size: 4,251 Bytes
ba0caea 3864760 251f923 ba0caea 251f923 5c54b7f 251f923 dc8a349 251f923 5c54b7f 78daae7 5c54b7f f863a68 5c54b7f f863a68 5c54b7f 8d8e632 2ef6f62 ba0caea dc8a349 5cc47df ff75dfb 5c54b7f 251f923 dc8a349 251f923 dc8a349 251f923 25bc648 251f923 3864760 251f923 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from langchain.chat_models import ChatAnthropic, ChatOpenAI
from langchain import PromptTemplate, LLMChain, HuggingFaceHub
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import streamlit as st
from dotenv import load_dotenv
import PyPDF2
import torch
load_dotenv()
class LegalExpert:
def __init__(self):
self.system_prompt = self.get_system_prompt()
self.user_prompt = HumanMessagePromptTemplate.from_template("{question}")
full_prompt_template = ChatPromptTemplate.from_messages(
[self.system_prompt, self.user_prompt]
)
# falcon model
model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
self.falcon_llm = pipeline("text-generation",
model=model_name,
tokenizer=tokenizer,
torch_dtype=torch.float16,
trust_remote_code=True,
device_map="auto")
# create llm pipeline for model
model_name = "google/flan-t5-xl"
self.huggingface_llm = pipeline("text-generation", model=model_name, tokenizer=AutoTokenizer.from_pretrained(model_name))
self.openai_gpt4_llm = ChatOpenAI(temperature=0, max_tokens=256)
# self.chat = ChatAnthropic()
self.chain = LLMChain(llm=self.huggingface_llm, prompt=full_prompt_template)
def get_system_prompt(self):
system_prompt = """
You are a Canadian Legal Expert.
Under no circumstances do you give legal advice.
You are adept at explaining the law in laymans terms, and you are able to provide context to legal questions.
While you can add context outside of the provided context, please do not add any information that is not directly relevant to the question, or the provided context.
You speak {language}.
### CONTEXT
{context}
### END OF CONTEXT
"""
return SystemMessagePromptTemplate.from_template(system_prompt)
def run_chain(self, language, context, question):
return self.chain.run(
language=language, context=context, question=question
)
def retrieve_pdf_text(pdf_file):
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# create a streamlit app
st.title("Document Explainer (that does not give advice)")
if "LegalExpert" not in st.session_state:
st.session_state.LegalExpert = LegalExpert()
# create a upload file widget for a pdf
pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"])
st.session_state.context = None
# if a pdf file is uploaded
if pdf_file:
# retrieve the text from the pdf
if "context" not in st.session_state:
st.session_state.context = retrieve_pdf_text(pdf_file)
# create a button that clears the context
if st.button("Clear context"):
st.session_state.__delitem__("context")
st.session_state.__delitem__("legal_response")
# if there's context, proceed
if "context" in st.session_state:
# create a dropdown widget for the language
language = st.selectbox("Language", ["English", "Français"])
# create a text input widget for a question
question = st.text_input("Ask a question")
# create a button to run the model
if st.button("Run"):
# run the model
legal_response = st.session_state.LegalExpert.run_chain(
language=language, context=st.session_state.context, question=question
)
print(f"legal_response: {legal_response}")
if "legal_response" not in st.session_state:
st.session_state.legal_response = legal_response
else:
st.session_state.legal_response = legal_response
# display the response
if "legal_response" in st.session_state:
st.write(st.session_state.legal_response)
|