Spaces:
Sleeping
Sleeping
import json | |
import faiss | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from tqdm.auto import tqdm | |
from sentence_transformers import SentenceTransformer | |
import torch | |
import copy | |
import llama_cpp | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
llm = Llama(model_path= hf_hub_download(repo_id="TheBloke/Llama-2-7b-Chat-GGUF", filename="llama-2-7b-chat.Q4_K_M.gguf"), n_ctx=2048) | |
#llm = Llama(model_path= hf_hub_download(repo_id="DFofanov78/ruGPT-3.5-13B-GGUF", filename="ruGPT-3.5-13B-Q4_0.gguf"), n_ctx=2048) | |
def list_to_numpy(obj): | |
if isinstance(obj, list): | |
return np.array(obj) | |
return obj | |
def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False): | |
tqdm.pandas(desc="Loading Data") | |
df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x) | |
df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns] | |
return df | |
def generate_embeddings(tokenizer, model, text): | |
with torch.no_grad(): | |
embeddings = model.encode(text, convert_to_tensor=True) | |
return embeddings.cpu().numpy() | |
def save_to_faiss(df): | |
dimension = len(df['Embeddings'].iloc[0]) | |
db = faiss.IndexFlatL2(dimension) | |
embeddings = np.array(df['Embeddings'].tolist()).astype('float32') | |
db.add(embeddings) | |
faiss.write_index(db, "faiss_index") | |
def search_in_faiss(query_vector, df, k=5): | |
db = faiss.read_index("faiss_index") | |
query_vector = np.array(query_vector).astype('float32').reshape(1, -1) | |
distances, indices = db.search(query_vector, k) | |
results = [] | |
for idx, dist in zip(indices[0], distances[0]): | |
answer_text = df.iloc[idx]['Answer'] | |
dist = np.sqrt(dist) | |
results.append({"Answer": answer_text, "Distance": dist}) | |
return results | |
def main(): | |
# Заголовок приложения | |
st.title("Demo for LLAMA-2 RAG with CPU only") | |
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') | |
df_qa = load_documents_from_jsonl(model, 'ExportForAI2.jsonl', False) | |
save_to_faiss(df_qa) | |
# Текстовое поле для ввода вопроса | |
input_text = st.text_input("Input", "") | |
dataList = [ | |
{"Answer": "", "Distance": 0}, | |
{"Answer": "", "Distance": 0}, | |
{"Answer": "", "Distance": 0} | |
] | |
# Кнопка "Answer" | |
if st.button("Answer"): | |
query_vector = model.encode(input_text.lower()) | |
dataList = search_in_faiss(query_vector, df_qa, k=3) | |
pass | |
# Таблица с данными | |
st.write("Most relevants answers") | |
st.table(dataList) | |
# Текстовое поле для вывода ответа LLM | |
llm_output_text = st.empty() | |
# Кнопка "Answer LLM" | |
if st.button("LLM Answer"): | |
question = input_text.lower() | |
context = dataList[0]["Answer"] | |
text_input = f''' | |
[INST]<<SYS>> Вы помощник в вопросах-ответах. Используйте следующий фрагменты полученного контекста, чтобы ответить на вопрос. Если вы не знаете ответа, просто скажите, что не знаете. Используйте максимум три предложения и будьте краткими.<</SYS>> | |
Вопрос: {question} | |
Контекст: {context} | |
Ответ: [/INST] | |
''' | |
output = llm(text_input, max_tokens=512, stream=True) | |
text_output = "" | |
for out in output: | |
stream = copy.deepcopy(out) | |
text_output += stream["choices"][0]["text"] | |
llm_output_text.text("LLAMA generated answer:") | |
llm_output_text.text(text_output) | |
# Запуск основной части приложения | |
if __name__ == "__main__": | |
main() |