Spaces:
Sleeping
Sleeping
########################################################################## | |
# app.py - Pennwick Honeybee Robot | |
# | |
# HuggingFace Spaces application to provide honeybee expertise | |
# with open-source models | |
# | |
# Mike Pastor February 23, 2024 | |
import streamlit as st | |
from streamlit.components.v1 import html | |
# from dotenv import load_dotenv | |
from PyPDF2 import PdfReader | |
from PIL import Image | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
# Local file | |
from htmlTemplates import css, bot_template, user_template | |
################################################################################## | |
# Admin flags | |
DISPLAY_DIALOG_LINES = 6 | |
SESSION_STARTED = False | |
# MODEL_NAME="deepset/roberta-base-squad2" | |
# MODEL_NAME="BEE-spoke-data/TinyLlama-3T-1.1bee" | |
# MODEL_NAME='HuggingFaceH4/zephyr-7b-beta' | |
############################################################## | |
# Our model and tokenizer | |
# | |
# MODEL_NAME = "facebook/blenderbot-400M-distill" | |
MODEL_NAME = "facebook/blenderbot-3B" | |
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
################################################################################## | |
def process_user_question(user_question): | |
# if not SESSION_STARTED: | |
# print('No Session') | |
# st.write( 'Please upload and analyze your PDF files first!') | |
# return | |
if user_question == None: | |
print('question is null') | |
return | |
if user_question == '': | |
print('question is blank') | |
return | |
if st == None: | |
print('session is null') | |
return | |
if st.session_state == None: | |
print('session STATE is null') | |
return | |
print('question is: ', user_question) | |
print('\nsession is: ', st) | |
################################################################# | |
# Track the overall time for training & submission preparation | |
# # | |
from datetime import datetime | |
global_now = datetime.now() | |
global_current_time = global_now.strftime("%H:%M:%S") | |
print("# app.py Starting up... - Current Time =", global_current_time) | |
st.write(('Question: ' + user_question ), unsafe_allow_html=True) | |
# input_text = input('Say something--> ') | |
print( 'history--> ', st.session_state.history_string) | |
################################################################ | |
# Tokenize the user prompt and conversation history | |
inputs = tokenizer.encode_plus( st.session_state.history_string, user_question, return_tensors="pt" ) | |
# st.write('Len of inputs= ', len( inputs)) | |
# Generate a response | |
outputs = model.generate( **inputs ) | |
# decode the response | |
response = tokenizer.decode( outputs[0], skip_special_tokens=True).strip() | |
# append history | |
st.session_state.conversation_history.append(user_question) | |
st.session_state.conversation_history.append(response) | |
# st.session_state.history_string = "/n".join(st.session_state.conversation_history) | |
st.session_state.history_string = "<br>".join( st.session_state.conversation_history ) | |
st.write( 'Response: ', response) | |
# Mission Complete! | |
################################################################################## | |
global_later = datetime.now() | |
st.write("Total query execute Time =", (global_later - global_now), global_later) | |
################################################################################# | |
def main(): | |
print('Pennwick Starting up...\n') | |
################################################################## | |
# Initial conversation tracking | |
if not hasattr(st.session_state, "conversation_history"): | |
st.session_state.conversation_history = [] | |
if not hasattr(st.session_state, "history_string"): | |
st.session_state.history_string = "\n".join(st.session_state.conversation_history) | |
# Load the environment variables - if any | |
# load_dotenv() | |
st.set_page_config(page_title="Pennwick Honeybee Robot", | |
page_icon="./HoneybeeLogo.ico") | |
st.write(css, unsafe_allow_html=True) | |
st.image("./HoneybeeLogo.png", width=96) | |
st.header(f"Pennwick Honeybee Robot - BETA VERSION") | |
print('Prepared page...\n') | |
user_question = None | |
user_question = st.text_input("Ask the Open Source - "+MODEL_NAME+" - Model any question about Honeybees...") | |
if user_question != None: | |
print('calling process question', user_question) | |
process_user_question(user_question) | |
html_history_string = "" | |
if len( st.session_state.history_string ) > 100: | |
html_history_string = st.session_state.history_string[-100:] | |
else: | |
html_history_string = st.session_state.history_string | |
html(html_history_string , height=150, scrolling=True) | |
# st.write( user_template, unsafe_allow_html=True) | |
# st.write(user_template.replace( "{{MSG}}", "Hello robot!"), unsafe_allow_html=True) | |
# st.write(bot_template.replace( "{{MSG}}", "Hello human!"), unsafe_allow_html=True) | |
# | |
# with st.sidebar: | |
# | |
# st.subheader("Which documents would you like to analyze?") | |
# st.subheader("(no data is saved beyond the session)") | |
# | |
# pdf_docs = st.file_uploader( | |
# "Upload your PDF documents here and click on 'Analyze'", accept_multiple_files=True) | |
# | |
# # Upon button press | |
# if st.button("Analyze these files"): | |
# with st.spinner("Processing..."): | |
# ################################################################# | |
# # Track the overall time for file processing into Vectors | |
# # # | |
# from datetime import datetime | |
# global_now = datetime.now() | |
# global_current_time = global_now.strftime("%H:%M:%S") | |
# st.write("Vectorizing Files - Current Time =", global_current_time) | |
# | |
# # get pdf text | |
# raw_text = extract_pdf_text(pdf_docs) | |
# # st.write(raw_text) | |
# | |
# # # get the text chunks | |
# text_chunks = extract_bitesize_pieces(raw_text) | |
# # st.write(text_chunks) | |
# | |
# # # create vector store | |
# vectorstore = prepare_embedding_vectors(text_chunks) | |
# | |
# # # create conversation chain | |
# st.session_state.conversation = prepare_conversation(vectorstore) | |
# | |
# SESSION_STARTED = True | |
# | |
# # Mission Complete! | |
# global_later = datetime.now() | |
# st.write("Files Vectorized - Total EXECUTION Time =", | |
# (global_later - global_now), global_later) | |
# | |
if __name__ == '__main__': | |
main() | |