Spaces:

Arylwen
/

mlk8s

Sleeping

App Files Files Community

mlk8s / app.py

Arylwen

0.1.0 suggested topics and ethics tab

bf1eda4 almost 2 years ago

raw

history blame

22.2 kB

	import streamlit as st

	import os
	import re
	import sys
	import time
	import base64
	import random
	import logging
	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logger = logging.getLogger(__name__)

	from dotenv import load_dotenv
	load_dotenv()

	for key in st.session_state.keys():
	#del st.session_state[key]
	print(f'session state entry: {key} {st.session_state[key]}')

	__spaces__ = os.environ.get('__SPACES__')

	if __spaces__:
	from kron.persistence.dynamodb_request_log import get_request_log;
	st.session_state.request_log = get_request_log()

	#third party service access
	#hf inference api
	hf_api_key = os.environ['HF_TOKEN']
	ch_api_key = os.environ['COHERE_TOKEN']
	bs_api_key = os.environ['BASETEN_TOKEN']

	index_model = "Writer/camel-5b-hf"
	INDEX_NAME = f"{index_model.replace('/', '-')}-default-no-coref"
	persist_path = f"storage/{INDEX_NAME}"
	MAX_LENGTH = 1024

	import baseten
	@st.cache_resource
	def set_baseten_key(bs_api_key):
	baseten.login(bs_api_key)

	set_baseten_key(bs_api_key)

	def autoplay_video(video_path):
	with open(video_path, "rb") as f:
	video_content = f.read()

	video_str = f"data:video/mp4;base64,{base64.b64encode(video_content).decode()}"
	st.markdown(f"""
	<video style="display: block; margin: auto; width: 140px;" controls loop autoplay width="140" height="180">
	<source src="{video_str}" type="video/mp4">
	</video>
	""", unsafe_allow_html=True)

	# sidebar
	with st.sidebar:
	st.header('KG Questions')
	video, text = st.columns([2, 2])
	with video:
	autoplay_video('docs/images/kg_construction.mp4')
	with text:
	st.write(
	f'''
	###### The construction of a Knowledge Graph is mesmerizing.
	###### Concepts in the middle are what most are doing. Are we considering anything different? Why? Why not?
	###### Concepts on the edge are what few are doing. Are we considering that? Why? Why not?
	'''
	)
	st.caption('''###### corpus by [@[email protected]](https://sigmoid.social/@ArxivHealthcareNLP)''')
	st.caption('''###### KG Questions by [arylwen](https://github.com/arylwen/mlk8s)''')
	# st.write(
	#f'''
	##### How can <what most are doing> help with <what few are doing>?
	#''')

	from llama_index import StorageContext
	from llama_index import ServiceContext
	from llama_index import load_index_from_storage
	from llama_index.langchain_helpers.text_splitter import SentenceSplitter
	from llama_index.node_parser import SimpleNodeParser
	from llama_index import LLMPredictor

	from langchain import HuggingFaceHub
	from langchain.llms.cohere import Cohere
	from langchain.llms import Baseten

	import tiktoken

	import openai
	#extensions to llama_index to support openai compatible endpoints, e.g. llama-api
	from kron.llm_predictor.KronOpenAILLM import KronOpenAI
	#baseten deployment expects a specific request format
	from kron.llm_predictor.KronBasetenCamelLLM import KronBasetenCamelLLM
	from kron.llm_predictor.KronLLMPredictor import KronLLMPredictor

	#writer/camel uses endoftext
	from llama_index.utils import globals_helper
	enc = tiktoken.get_encoding("gpt2")
	tokenizer = lambda text: enc.encode(text, allowed_special={"<\|endoftext\|>"})
	globals_helper._tokenizer = tokenizer


	def set_openai_local():
	openai.api_key = os.environ['LOCAL_OPENAI_API_KEY']
	openai.api_base = os.environ['LOCAL_OPENAI_API_BASE']
	os.environ['OPENAI_API_KEY'] = os.environ['LOCAL_OPENAI_API_KEY']
	os.environ['OPENAI_API_BASE'] = os.environ['LOCAL_OPENAI_API_BASE']

	def set_openai():
	openai.api_key = os.environ['DAVINCI_OPENAI_API_KEY']
	openai.api_base = os.environ['DAVINCI_OPENAI_API_BASE']
	os.environ['OPENAI_API_KEY'] = os.environ['DAVINCI_OPENAI_API_KEY']
	os.environ['OPENAI_API_BASE'] = os.environ['DAVINCI_OPENAI_API_BASE']

	def get_hf_predictor(query_model):
	# no embeddings for now
	set_openai_local()
	llm=HuggingFaceHub(repo_id=query_model, task="text-generation",
	model_kwargs={"temperature": 0.01, "max_new_tokens": MAX_LENGTH, 'frequency_penalty':1.17},
	huggingfacehub_api_token=hf_api_key)
	llm_predictor = LLMPredictor(llm)
	return llm_predictor

	def get_cohere_predictor(query_model):
	# no embeddings for now
	set_openai_local()
	llm=Cohere(model='command', temperature = 0.01,
	# model_kwargs={"temperature": 0.01, "max_length": MAX_LENGTH},
	cohere_api_key=ch_api_key)
	llm_predictor = LLMPredictor(llm)
	return llm_predictor

	def get_baseten_predictor(query_model):
	# no embeddings for now
	set_openai_local()
	llm=KronBasetenCamelLLM(model='3yd1ke3', temperature = 0.01,
	# model_kwargs={"temperature": 0.01, "max_length": MAX_LENGTH, 'repetition_penalty':1.07},
	model_kwargs={"temperature": 0.01, "max_length": MAX_LENGTH, 'frequency_penalty':1},
	cohere_api_key=ch_api_key)
	llm_predictor = LLMPredictor(llm)
	return llm_predictor

	def get_kron_openai_predictor(query_model):
	# define LLM
	llm=KronOpenAI(temperature=0.01, model=query_model)
	llm.max_tokens = MAX_LENGTH
	llm_predictor = KronLLMPredictor(llm)
	return llm_predictor

	def get_servce_context(llm_predictor):
	# define TextSplitter
	text_splitter = SentenceSplitter(chunk_size=192, chunk_overlap=48, paragraph_separator='\n')
	#define NodeParser
	node_parser = SimpleNodeParser(text_splitter=text_splitter)
	#define ServiceContext
	service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, node_parser=node_parser)
	return service_context

	# hack - on subsequent calls we can pass anything as index
	@st.cache_data
	def get_networkx_graph_nodes(_index, persist_path):
	g = _index.get_networkx_graph(100000)
	sorted_nodes = sorted(g.degree, key = lambda x: x[1], reverse=True)
	return sorted_nodes

	@st.cache_data
	def get_networkx_low_connected_components(_index, persist_path):
	g = _index.get_networkx_graph(100000)
	import networkx as nx
	sorted_c = [c for c in sorted(nx.connected_components(g), key=len, reverse=False)]
	#print(sorted_c[:100])
	low_terms = []
	for c in sorted_c:
	for cc in c:
	low_terms.extend([cc])
	#print(low_terms)
	return low_terms

	def get_index(service_context, persist_path):
	print(f'Loading index from {persist_path}')
	# rebuild storage context
	storage_context = StorageContext.from_defaults(persist_dir=persist_path)
	# load index
	index = load_index_from_storage(storage_context=storage_context,
	service_context=service_context,
	max_triplets_per_chunk=2,
	show_progress = False)
	get_networkx_graph_nodes(index, persist_path)
	get_networkx_low_connected_components(index, persist_path)
	return index

	def get_query_engine(index):
	#writer/camel does not understand the refine prompt
	RESPONSE_MODE = 'accumulate'
	query_engine = index.as_query_engine(response_mode = RESPONSE_MODE)
	return query_engine

	def load_query_engine(llm_predictor, persist_path):
	service_context = get_servce_context(llm_predictor)
	index = get_index(service_context, persist_path)
	print(f'No query engine for {persist_path}; creating')
	query_engine = get_query_engine(index)
	return query_engine

	@st.cache_resource
	def build_kron_query_engine(query_model, persist_path):
	llm_predictor = get_kron_openai_predictor(query_model)
	query_engine = load_query_engine(llm_predictor, persist_path)
	return query_engine

	@st.cache_resource
	def build_hf_query_engine(query_model, persist_path):
	llm_predictor = get_hf_predictor(query_model)
	query_engine = load_query_engine(llm_predictor, persist_path)
	return query_engine

	@st.cache_resource
	def build_cohere_query_engine(query_model, persist_path):
	llm_predictor = get_cohere_predictor(query_model)
	query_engine = load_query_engine(llm_predictor, persist_path)
	return query_engine

	@st.cache_resource
	def build_baseten_query_engine(query_model, persist_path):
	llm_predictor = get_baseten_predictor(query_model)
	query_engine = load_query_engine(llm_predictor, persist_path)
	return query_engine

	def format_response(answer):
	# Replace any eventual --
	dashes = r'(\-{2,50})'
	answer.response = re.sub(dashes, '', answer.response)
	return answer.response or "None"

	def clear_question(query_model):
	if not ('prev_model' in st.session_state) or (('prev_model' in st.session_state) and (st.session_state.prev_model != query_model)) :
	if 'prev_model' in st.session_state:
	print(f'clearing question {st.session_state.prev_model} {query_model}')
	else:
	print(f'clearing question None {query_model}')
	if('question_input' in st.session_state):
	st.session_state.question = st.session_state.question_input
	st.session_state.question_input = ''
	st.session_state.question_answered = False
	st.session_state.answer = ''
	st.session_state.answer_rating = 3
	st.session_state.elapsed = 0
	st.session_state.prev_model = query_model

	query, measurable, explainable, ethical = st.tabs(["Query", "Measurable", "Explainable", "Ethical"])

	initial_query = ''

	if 'question' not in st.session_state:
	st.session_state.question = ''

	if __spaces__ :
	with query:
	answer_model = st.radio(
	"Choose the model used for inference:",
	('hf/tiiuae/falcon-7b-instruct', 'cohere/command', 'baseten/Camel-5b', 'openai/text-davinci-003') #TODO start hf inference container on demand
	)
	else :
	with query:
	answer_model = st.radio(
	"Choose the model used for inference:",
	('Local-Camel', 'HF-TKI', 'hf/tiiuae/falcon-7b-instruct', 'openai/text-davinci-003')
	)

	if answer_model == 'openai/text-davinci-003':
	print(answer_model)
	query_model = 'text-davinci-003'
	clear_question(query_model)
	set_openai()
	query_engine = build_kron_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	elif answer_model == 'hf/tiiuae/falcon-7b-instruct':
	print(answer_model)
	query_model = 'tiiuae/falcon-7b-instruct'
	clear_question(query_model)
	query_engine = build_hf_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	elif answer_model == 'cohere/command':
	print(answer_model)
	query_model = 'cohere/command'
	clear_question(query_model)
	query_engine = build_cohere_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	elif answer_model == 'baseten/Camel-5b':
	print(answer_model)
	query_model = 'baseten/Camel-5b'
	clear_question(query_model)
	query_engine = build_baseten_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	elif answer_model == 'Local-Camel':
	query_model = 'Writer/camel-5b-hf'
	print(answer_model)
	clear_question(query_model)
	set_openai_local()
	query_engine = build_kron_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	elif answer_model == 'HF-TKI':
	query_model = 'allenai/tk-instruct-3b-def-pos-neg-expl'
	clear_question(query_model)
	query_engine = build_hf_query_engine(query_model, persist_path)
	graph_nodes = get_networkx_graph_nodes( "", persist_path)
	most_connected = random.sample(graph_nodes[:100], 5)
	low_connected = get_networkx_low_connected_components( "", persist_path)
	least_connected = random.sample(low_connected, 5)
	else:
	print('This is a bug.')

	# to clear input box
	def submit():
	st.session_state.question = st.session_state.question_input
	st.session_state.question_input = ''
	st.session_state.question_answered = False

	with st.sidebar:
	option_1 = st.selectbox("What most are studying:", most_connected, disabled=True)
	option_2 = st.selectbox("What few are studying:", least_connected, disabled=True)

	with query:
	st.caption(f'''###### Only intended for educational and research purposes. Please do not enter any private or confidential information. Model, question, answer and rating are logged to improve KG Questions.''')
	#st.caption(f'''Model, question, answer and rating are logged to improve KG Questions.''')
	question = st.text_input("Enter a question, e.g. What benchmarks can we use for QA?", key='question_input', on_change=submit )

	if(st.session_state.question):
	try :
	with query:
	col1, col2 = st.columns([2, 2])
	if not st.session_state.question_answered:
	with st.spinner(f'Answering: {st.session_state.question} with {query_model}.'):
	start = time.time()
	answer = query_engine.query(st.session_state.question)
	st.session_state.answer = answer
	st.session_state.question_answered = True
	end = time.time()
	st.session_state.elapsed = (end-start)
	else:
	answer = st.session_state.answer
	answer_str = format_response(answer)
	with col1:
	if answer_str:
	elapsed = '{:.2f}'.format(st.session_state.elapsed)
	st.write(f'Answered: {st.session_state.question} with {query_model} in {elapsed}s. Please rate this answer.')
	with col2:
	from streamlit_star_rating import st_star_rating
	stars = st_star_rating("", maxValue=5, defaultValue=3, key="answer_rating")
	st.write(answer_str)

	with measurable:
	from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
	import matplotlib.pyplot as plt
	from PIL import Image
	wc_all, wc_question, wc_reference = st.columns([3, 3, 3])
	wordcloud = WordCloud(max_font_size=50, max_words=1000, background_color="white")
	with wc_all:
	image = Image.open('docs/images/all_papers_wordcloud.png')
	st.image(image)
	st.caption('''###### Corpus term frequecy.''')
	with wc_question:
	wordcloud_q = wordcloud.generate(answer_str)
	st.image(wordcloud_q.to_array())
	st.caption('''###### Answer term frequecy.''')
	with wc_reference:
	all_reference_texts = ''
	for nodewithscore in answer.source_nodes:
	node = nodewithscore.node
	from llama_index.schema import NodeRelationship
	#if NodeRelationship.SOURCE in node.relationships:
	all_reference_texts = all_reference_texts + '\n' + node.text
	wordcloud_r = wordcloud.generate(all_reference_texts)
	st.image(wordcloud_r.to_array())
	st.caption('''###### Reference plus graph term frequecy.''')

	with explainable:
	#st.write(answer.source_nodes)
	from pyvis.network import Network
	graph = Network(height="450px", width="100%")
	sources_table = []
	#all_reference_texts = ''
	for nodewithscore in answer.source_nodes:
	node = nodewithscore.node
	from llama_index.schema import NodeRelationship
	if NodeRelationship.SOURCE in node.relationships:
	node_id = node.relationships[NodeRelationship.SOURCE].node_id
	node_id = node_id.split('/')[-1]
	title = node_id.split('.')[2].replace('_', ' ')
	link = '.'.join(node_id.split('.')[:2])[:10]
	link = f'https://arxiv.org/abs/{link}'
	href = f'<a target="_blank" href="{link}">{title}</a>'
	sources_table.extend([[href, node.text]])
	#all_reference_texts = all_reference_texts + '\n' + node.text
	else:
	#st.write(node.text) TODO second level relationships
	rel_map = node.metadata['kg_rel_map']
	for concept in rel_map.keys():
	#st.write(concept)
	graph.add_node(concept, concept, title=concept)
	rels = rel_map[concept]
	for rel in rels:
	graph.add_node(rel[1], rel[1], title=rel[1])
	graph.add_edge(concept, rel[1], title=rel[0])
	# --- display the query terms graph
	st.session_state.graph_name = 'graph.html'
	graph.save_graph(st.session_state.graph_name)
	import streamlit.components.v1 as components
	graphHtml = open(st.session_state.graph_name, 'r', encoding='utf-8')
	source_code = graphHtml.read()
	components.html(source_code, height = 500)
	# --- display the reference texts table
	import pandas as pd
	df = pd.DataFrame(sources_table)
	df.columns = ['paper', 'relevant text']
	st.markdown(""" <style>
	table[class*="dataframe"] {
	font-size: 10px;
	}
	</style> """, unsafe_allow_html=True)
	st.write(df.to_html(escape=False), unsafe_allow_html=True)
	# reference text wordcloud
	#st.session_state.reference_wcloud = all_reference_texts

	with ethical:
	st.write('##### Bias, risks, limitations and terms of use for the models.')
	ethics_statement = []
	falcon = ['hf/tiiuae/falcon-7b-instruct', '<a target="_blank" href="https://huggingface.co/tiiuae/falcon-7b">Bias, Risks, and Limitations</a>']
	cohere = ['cohere/command', '<a target="_blank" href="https://cohere.com/terms-of-use">Terms of use</a>']
	camel = ['baseten/Camel-5b', '<a target="_blank" href="https://huggingface.co/Writer/camel-5b-hf">Bias, Risks, and Limitations</a>']
	davinci = ['openai/text-davinci-003', '<a target="_blank" href="https://openai.com/policies/terms-of-use">Terms of Use</a>']

	ethics_statement.extend([falcon, cohere, camel, davinci])
	df = pd.DataFrame(ethics_statement)
	df.columns = ['model', 'model link']
	st.markdown(""" <style>
	table[class*="dataframe"] {
	font-size: 14px;
	}
	</style> """, unsafe_allow_html=True)
	st.write(df.to_html(escape=False), unsafe_allow_html=True)
	# license
	st.write('')
	st.write('##### How papers were included in the index based on license.')
	st.caption(f'The paper id and title has been included in the index for a full attribution to the authors')
	ccby = ['<a target="_blank" href="https://creativecommons.org/licenses/by/4.0/">CC BY</a>',
	'<a target="_blank" href="https://github.com/arylwen/mlk8s/tree/main/apps/papers-kg">full content KG questions pipeline</a>']
	ccbysa = ['<a target="_blank" href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA</a>',
	'<a target="_blank" href="https://github.com/arylwen/mlk8s/tree/main/apps/papers-kg">full content KG questions pipeline</a>']
	ccbyncsa = ['<a target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC NC-BY-NC-SA</a>',
	'<a target="_blank" href="https://github.com/arylwen/mlk8s/tree/main/apps/papers-kg">full content KG questions pipeline</a>']
	ccbyncnd = ['<a target="_blank" href="https://creativecommons.org/licenses/by-nc-nd/4.0/">CC NC-BY-NC-ND</a>',
	'<a target="_blank" href="https://github.com/arylwen/mlk8s/tree/main/apps/papers-kg">arxiv metadata abstract KG questions pipeline</a>']
	license_statement = [ccby, ccbysa, ccbyncsa, ccbyncnd]
	df = pd.DataFrame(license_statement)
	df.columns = ['license', 'how papers are used']
	st.markdown(""" <style>
	table[class*="dataframe"] {
	font-size: 14px;
	}
	</style> """, unsafe_allow_html=True)
	st.write(df.to_html(escape=False), unsafe_allow_html=True)

	except Exception as e:
	#print(f'{type(e)}, {e}')
	answer_str = f'{type(e)}, {e}'
	st.session_state.answer_rating = -1
	st.write(f'An error occured, please try again. \n{answer_str}')
	finally:
	if 'question' in st.session_state:
	req = st.session_state.question
	if(__spaces__):
	st.session_state.request_log.add_request_log_entry(query_model, req, answer_str, st.session_state.answer_rating)