|
import pinecone |
|
import streamlit as st |
|
from sentence_transformers import SentenceTransformer |
|
from transformers import BartTokenizer, BartForConditionalGeneration |
|
|
|
|
|
class BartGenerator: |
|
def __init__(self, model_name): |
|
self.tokenizer = BartTokenizer.from_pretrained(model_name) |
|
self.generator = BartForConditionalGeneration.from_pretrained(model_name) |
|
|
|
def tokenize(self, query, max_length=1024): |
|
inputs = self.tokenizer([query], max_length=max_length, return_tensors="pt") |
|
return inputs |
|
|
|
def generate(self, query, min_length=20, max_length=40): |
|
inputs = self.tokenize(query) |
|
ids = self.generator.generate(inputs["input_ids"], num_beams=1, min_length=int(min_length), max_length=int(max_length), temperature=int(temperature)) |
|
answer = self.tokenizer.batch_decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] |
|
return answer |
|
|
|
@st.experimental_singleton |
|
def init_models(): |
|
retriever = SentenceTransformer("flax-sentence-embeddings/all_datasets_v3_mpnet-base") |
|
generator = BartGenerator("vblagoje/bart_lfqa") |
|
return retriever, generator |
|
|
|
PINECONE_KEY = st.secrets["PINECONE_KEY"] |
|
|
|
@st.experimental_singleton |
|
def init_pinecone(): |
|
pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp") |
|
return pinecone.Index("history-qa") |
|
|
|
retriever, generator = init_models() |
|
index = init_pinecone() |
|
|
|
def display_answer(answer): |
|
return st.markdown(f""" |
|
<div class="container-fluid"> |
|
<div class="row align-items-start"> |
|
<div class="col-md-12 col-sm-12"> |
|
<span style="color: #808080;"> |
|
{answer} |
|
</span> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
def display_context(title, context, url): |
|
return st.markdown(f""" |
|
<div class="container-fluid"> |
|
<div class="row align-items-start"> |
|
<div class="col-md-12 col-sm-12"> |
|
<a href={url}>{title}</a> |
|
<br> |
|
<span style="color: #808080;"> |
|
<small>{context}</small> |
|
</span> |
|
</div> |
|
</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
hide_streamlit_style = """ |
|
<style> |
|
#MainMenu {visibility: hidden;} |
|
footer {visibility: hidden;} |
|
</style> |
|
""" |
|
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
|
|
|
st.write(""" |
|
# Jua Historia Yetu |
|
### An AI Powered Search Engine for East African History and Tourism! |
|
|
|
This is an AI powered system designed to help learn about our history, heroes, cultures and tourist destinations. |
|
|
|
The system generates a Human-like response to questions asked and points users to where they |
|
can get more information on what they would like to know. |
|
It is intended to act as a one-stop search engine for all things East Africa including the people, history, culture, wildlife and tourist destinations. |
|
It can be of use to locals, tourists, students or anyone who would like to learn about The East African Community. |
|
The data is to be sourced from the EAC e-resourse database, member nations' meuseums, archives and relevant tourism bodies. |
|
|
|
Once queried, the system generates a short answer that the user can quickly read through and also points the user to |
|
some resources they might find usefull. The user can click on the links to learn more. |
|
""") |
|
|
|
st.markdown(""" |
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous"> |
|
""", unsafe_allow_html=True) |
|
|
|
def format_query(query, context): |
|
context = [f"<P> {m['metadata']['passage_text']}" for m in context] |
|
context = " ".join(context) |
|
query = f"question: {query} context: {context}" |
|
return query |
|
|
|
|
|
top_k = 5 |
|
min_length = 1 |
|
max_length = 150 |
|
temperature = 3.5 |
|
|
|
st.sidebar.write(""" |
|
## Here are some questions you can try out: |
|
### Copy and paste to test |
|
who was the first person on the moon?\n |
|
Which was the first radio station at Auburn University\n |
|
where is Damastown located\n |
|
What is the Lohanipur Torso \n |
|
when was The Coliseum Theatre opened\n |
|
Who invented the tatoo machine\n |
|
whats th erecipe for Corn chowder\n |
|
when was the Tamil Methodist Church built\n |
|
when was the first electric power system built?\n |
|
How was the first wireless message sent?\n |
|
what was the war of currents?\n |
|
what was NASAs most expensive project?\n |
|
What brands of smokoing paper are manufactured by Miguel y Costas\n |
|
what influenced the naming Holy Forty Martyrs Church\n |
|
When was the world first power system built\n |
|
which is the largest island within the Halifax Harbour\n |
|
Who was Joseph Monier\n |
|
who were the Karadjordjevic dynasty\n |
|
how many royal tombs were excavated at Tillia Tepe\n |
|
What did the HEICO company manufacture\n |
|
tell me about The Battle of Antietam\n |
|
Which was the smallest microbrewery in the United States\n |
|
when did queen marie recieve the bran castle\n |
|
Whe was York Township founded\n |
|
When did the United Nations Security Council reform the security sector\n |
|
When was Magandang Umaga Po first aired\n |
|
when was Mae Lan District formed\n |
|
what is Voice over Internet Protocol\n |
|
When was InfluxDB developed\n |
|
When was the Semanário Económico newspaper started\n |
|
who owned Kasteln Castle\n |
|
when was The Steinbach Haus built\n |
|
when was the Guerrero ship in Africa\n |
|
tell me about the Guerrero ship\n |
|
When was the Companhia Paulista de Trens Metropolitanos rilway built\n |
|
When was the lincoln mall demolished\n |
|
where is Damastown located\n |
|
when was solo diving first practiced\n |
|
when was Consumers Credit Union History Consumers Credit Union was founded\n |
|
Who built the castle of Daroynk\n |
|
What is the prime meridian\n |
|
Which was the first radio station at Auburn University\n |
|
What are the origins of feminist music\n |
|
What were the earliest insecticides to be used\n |
|
who were the Drevlians\n |
|
Who were the founders of A.F.C. Euro Kickers\n |
|
when was the camera-on-a-chip developed\n |
|
""") |
|
|
|
st.write("If you encounter an error, search again.") |
|
query = st.text_input("Search!", "") |
|
|
|
if query != "": |
|
with st.spinner(text="Wait a sec 🚀🚀🚀"): |
|
xq = retriever.encode([query]).tolist() |
|
xc = index.query(xq, top_k=int(top_k), include_metadata=True) |
|
query = format_query(query, xc["matches"]) |
|
|
|
with st.spinner(text="Just a minute ✍️✍️✍️"): |
|
answer = generator.generate(query, min_length=min_length, max_length=max_length) |
|
|
|
st.write("#### System generated response:") |
|
display_answer(answer) |
|
st.write("#### Here are some resources you might find relevant:") |
|
|
|
for m in xc["matches"]: |
|
title = m["metadata"]["article_title"] |
|
url = "https://en.wikipedia.org/wiki/" + title.replace(" ", "_") |
|
context = m["metadata"]["passage_text"] |
|
display_context(title, context, url) |