File size: 7,064 Bytes
9f6ab40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import pinecone
import streamlit as st
from sentence_transformers import SentenceTransformer
from transformers import BartTokenizer, BartForConditionalGeneration
class BartGenerator:
def __init__(self, model_name):
self.tokenizer = BartTokenizer.from_pretrained(model_name)
self.generator = BartForConditionalGeneration.from_pretrained(model_name)
def tokenize(self, query, max_length=1024):
inputs = self.tokenizer([query], max_length=max_length, return_tensors="pt")
return inputs
def generate(self, query, min_length=20, max_length=40):
inputs = self.tokenize(query)
ids = self.generator.generate(inputs["input_ids"], num_beams=1, min_length=int(min_length), max_length=int(max_length), temperature=int(temperature))
answer = self.tokenizer.batch_decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
return answer
@st.experimental_singleton
def init_models():
retriever = SentenceTransformer("flax-sentence-embeddings/all_datasets_v3_mpnet-base") #("multi-qa-mpnet-base-cos-v1") ("flax-sentence-embeddings/all_datasets_v3_mpnet-base")
generator = BartGenerator("vblagoje/bart_lfqa")
return retriever, generator
PINECONE_KEY = st.secrets["PINECONE_KEY"]
@st.experimental_singleton
def init_pinecone():
pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
return pinecone.Index("history-qa")
retriever, generator = init_models()
index = init_pinecone()
def display_answer(answer):
return st.markdown(f"""
<div class="container-fluid">
<div class="row align-items-start">
<div class="col-md-12 col-sm-12">
<span style="color: #808080;">
{answer}
</span>
</div>
</div>
</div>
""", unsafe_allow_html=True)
def display_context(title, context, url):
return st.markdown(f"""
<div class="container-fluid">
<div class="row align-items-start">
<div class="col-md-12 col-sm-12">
<a href={url}>{title}</a>
<br>
<span style="color: #808080;">
<small>{context}</small>
</span>
</div>
</div>
</div>
""", unsafe_allow_html=True)
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.write("""
# Jua Historia Yetu
### An AI Powered Search Engine for East African History and Tourism!
This is an AI powered system designed to help learn about our history, heroes, cultures and tourist destinations.
The system generates a Human-like response to questions asked and points users to where they
can get more information on what they would like to know.
It is intended to act as a one-stop search engine for all things East Africa including the people, history, culture, wildlife and tourist destinations.
It can be of use to locals, tourists, students or anyone who would like to learn about The East African Community.
The data is to be sourced from the EAC e-resourse database, member nations' meuseums, archives and relevant tourism bodies.
Once queried, the system generates a short answer that the user can quickly read through and also points the user to
some resources they might find usefull. The user can click on the links to learn more.
""")
st.markdown("""
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
""", unsafe_allow_html=True)
def format_query(query, context):
context = [f"<P> {m['metadata']['passage_text']}" for m in context]
context = " ".join(context)
query = f"question: {query} context: {context}"
return query
# set parameters
top_k = 5
min_length = 1
max_length = 150
temperature = 3.5
st.sidebar.write("""
## Here are some questions you can try out:
### Copy and paste to test
who was the first person on the moon?\n
Which was the first radio station at Auburn University\n
where is Damastown located\n
What is the Lohanipur Torso \n
when was The Coliseum Theatre opened\n
Who invented the tatoo machine\n
whats th erecipe for Corn chowder\n
when was the Tamil Methodist Church built\n
when was the first electric power system built?\n
How was the first wireless message sent?\n
what was the war of currents?\n
what was NASAs most expensive project?\n
What brands of smokoing paper are manufactured by Miguel y Costas\n
what influenced the naming Holy Forty Martyrs Church\n
When was the world first power system built\n
which is the largest island within the Halifax Harbour\n
Who was Joseph Monier\n
who were the Karadjordjevic dynasty\n
how many royal tombs were excavated at Tillia Tepe\n
What did the HEICO company manufacture\n
tell me about The Battle of Antietam\n
Which was the smallest microbrewery in the United States\n
when did queen marie recieve the bran castle\n
Whe was York Township founded\n
When did the United Nations Security Council reform the security sector\n
When was Magandang Umaga Po first aired\n
when was Mae Lan District formed\n
what is Voice over Internet Protocol\n
When was InfluxDB developed\n
When was the Semanário Económico newspaper started\n
who owned Kasteln Castle\n
when was The Steinbach Haus built\n
when was the Guerrero ship in Africa\n
tell me about the Guerrero ship\n
When was the Companhia Paulista de Trens Metropolitanos rilway built\n
When was the lincoln mall demolished\n
where is Damastown located\n
when was solo diving first practiced\n
when was Consumers Credit Union History Consumers Credit Union was founded\n
Who built the castle of Daroynk\n
What is the prime meridian\n
Which was the first radio station at Auburn University\n
What are the origins of feminist music\n
What were the earliest insecticides to be used\n
who were the Drevlians\n
Who were the founders of A.F.C. Euro Kickers\n
when was the camera-on-a-chip developed\n
""")
st.write("If you encounter an error, search again.")
query = st.text_input("Search!", "")
if query != "":
with st.spinner(text="Wait a sec 🚀🚀🚀"):
xq = retriever.encode([query]).tolist()
xc = index.query(xq, top_k=int(top_k), include_metadata=True)
query = format_query(query, xc["matches"])
with st.spinner(text="Just a minute ✍️✍️✍️"):
answer = generator.generate(query, min_length=min_length, max_length=max_length)
st.write("#### System generated response:")
display_answer(answer)
st.write("#### Here are some resources you might find relevant:")
for m in xc["matches"]:
title = m["metadata"]["article_title"]
url = "https://en.wikipedia.org/wiki/" + title.replace(" ", "_")
context = m["metadata"]["passage_text"]
display_context(title, context, url) |