Spaces:
Runtime error
Runtime error
File size: 4,691 Bytes
3d33fb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
from streamlit.logger import get_logger
import datasets
import pandas as pd
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage
from sentence_transformers import util
from torch import tensor
from io import StringIO
LOGGER = get_logger(__name__)
@st.cache_data
def get_df(uploaded_file) ->object:
if uploaded_file is None:
return None
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
string_data = stringio.read()
df = pd.DataFrame(string_data.split('\n'), columns=['text'])
return df
@st.cache_data
def get_embeddings(df,_embeddings_model) ->object:
df['embeddings'] = df['text'].apply(lambda x: _embeddings_model.embed_query('passage: '+ x))
return df
@st.cache_resource
def get_model()->object:
model_name = "intfloat/multilingual-e5-large"
model_kwargs = {'device': 'cuda'} #'cpu' or 'cuda'
encode_kwargs = {'normalize_embeddings': True}
embeddings_model = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
return embeddings_model
@st.cache_resource
def get_chat_api(api_key:str):
chat = ChatOpenAI(model="gpt-3.5-turbo-16k", api_key=api_key)
return chat
def get_results(embeddings_model,input,df,num_of_results) -> pd.DataFrame:
embeddings = embeddings_model.embed_query('query: '+ input)
hits = util.semantic_search(tensor(embeddings), tensor(df['embeddings'].tolist()), top_k=num_of_results)
hit_list = [hit['corpus_id'] for hit in hits[0]]
return df.iloc[hit_list]
def get_llm_results(query,chat,results):
prompt_template = PromptTemplate.from_template(
"""
your misssion is to rank the given answers based on their relevance to the given question.
Provide a relevancy score between 0 (not relevant) and 1 (highly relevant) for each possible answer.
the results should be in the following JSON format: "answer": "score", "answer": "score" while answer is the possible answer's text and score is the relevancy score.
the question is: {query}
the possible answers are:
{answers}
""" )
messages = [
SystemMessage(content="""
You're a helpful assistant.
Return a JSON formatted string.
"""),
HumanMessage(content=prompt_template.format(query=query, answers=str.join('\n', results['text'].head(10).tolist()))),
]
response = chat.invoke(messages)
llm_results_df = pd.read_json(response.content, orient='index')
llm_results_df.rename(columns={0: 'score'}, inplace=True)
llm_results_df.sort_values(by='score', ascending=False, inplace=True)
return llm_results_df
def run():
st.set_page_config(
page_title=" ืืืคืืฉ ืกืื ืื",
page_icon="",
layout="wide",
initial_sidebar_state="expanded"
)
st.write("# ืืืคืืฉ ืืื ")
st.write('ื ืืชื ืืืขืืืช ืื ืงืืืฅ ืืงืกื, ืืืืชืื ืืืฆืืจืช ืืืื ืืงืก ืืืืืจ ืืื ืืืคืฉ ืืฉืคื ืืืคืฉืืช')
st.write('ืืฆืืจืช ืืืื ืืงืก ืขืฉืืื ืืงืืช ืืกืคืจ ืืงืืช, ืืชืืืื ืืืืื ืืงืืืฅ')
uploaded_file = st.file_uploader('ืืขืื ืงืืืฅ', type=['txt'], on_change=run)
embeddings_model = get_model()
df = get_df(uploaded_file)
if df is None:
st.write("ืื ืืืขืื ืงืืืฅ")
else:
df = get_embeddings(df,embeddings_model)
user_input = st.text_input('ืืชืื ืืื ืืช ืฉืืืชื', placeholder='')
num_of_results = st.sidebar.slider('ืืกืคืจ ืืชืืฆืืืช ืฉืืจืฆืื ื ืืืฆืื:',1,25,5)
use_llm = st.sidebar.checkbox("ืืฉืชืืฉ ืืืืื ืฉืคื ืืื ืืฉืคืจ ืชืืฆืืืช", False)
openAikey = st.sidebar.text_input("OpenAI API key", type="password")
if (st.button('ืืคืฉ') or user_input) and user_input!="" and df is not None:
results = get_results(embeddings_model,user_input,df,num_of_results)
if use_llm:
if openAikey == None or openAikey=="":
st.write("ืื ืืืื ืก ืืคืชื ืฉื OpenAI")
else:
chat = get_chat_api(openAikey)
llm_results = get_llm_results(user_input,chat,results)
st.write(llm_results)
else:
st.write(results.head(10))
if __name__ == "__main__":
run()
|