Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +59 -9
- requirements.txt +6 -1
app.py
CHANGED
@@ -1,31 +1,81 @@
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
# Пример данных для таблицы
|
4 |
dataList = [
|
5 |
-
{"Answer": "
|
6 |
-
{"Answer": "
|
7 |
-
{"Answer": "
|
8 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
# Основная часть приложения
|
11 |
def main():
|
12 |
# Заголовок приложения
|
13 |
-
st.title("
|
14 |
|
|
|
|
|
|
|
15 |
# Текстовое поле для ввода вопроса
|
16 |
input_text = st.text_input("Input", "")
|
17 |
|
18 |
# Кнопка "Answer"
|
19 |
if st.button("Answer"):
|
20 |
-
|
|
|
21 |
pass
|
22 |
|
23 |
# Таблица с данными
|
24 |
-
st.write("
|
25 |
st.table(dataList)
|
26 |
|
27 |
# Текстовое поле для вывода текста
|
28 |
-
st.write("
|
29 |
text_output = st.text_area("", "")
|
30 |
|
31 |
# Запуск основной части приложения
|
|
|
1 |
+
import json
|
2 |
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from tqdm.auto import tqdm
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
|
8 |
+
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
9 |
|
|
|
10 |
dataList = [
|
11 |
+
{"Answer": "", "Distance": 0},
|
12 |
+
{"Answer": "", "Distance": 0},
|
13 |
+
{"Answer": "", "Distance": 0}
|
14 |
]
|
15 |
+
def list_to_numpy(obj):
|
16 |
+
if isinstance(obj, list):
|
17 |
+
return np.array(obj)
|
18 |
+
return obj
|
19 |
+
|
20 |
+
def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False):
|
21 |
+
tqdm.pandas(desc="Loading Data")
|
22 |
+
df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x)
|
23 |
+
|
24 |
+
df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns]
|
25 |
+
|
26 |
+
if createEmbeddings:
|
27 |
+
tqdm.pandas(desc="Creating Embeddings")
|
28 |
+
df['Embeddings'] = df.progress_apply(lambda x: embeddings_model.encode(f"Question: {x['Question']} \n Answer: {x['Answer']}").tolist(), axis=1)
|
29 |
+
|
30 |
+
return df
|
31 |
+
|
32 |
+
def generate_embeddings(model, text):
|
33 |
+
with torch.no_grad():
|
34 |
+
embeddings = model.encode(text, convert_to_tensor=True)
|
35 |
+
return embeddings.cpu().numpy()
|
36 |
+
|
37 |
+
def save_to_faiss(df):
|
38 |
+
dimension = len(df['Embeddings'].iloc[0])
|
39 |
+
db = faiss.IndexFlatL2(dimension)
|
40 |
+
embeddings = np.array(df['Embeddings'].tolist()).astype('float32')
|
41 |
+
db.add(embeddings)
|
42 |
+
faiss.write_index(db, "faiss_index")
|
43 |
+
|
44 |
+
def search_in_faiss(query_vector, df, k=5):
|
45 |
+
db = faiss.read_index("faiss_index")
|
46 |
+
query_vector = np.array(query_vector).astype('float32').reshape(1, -1)
|
47 |
+
distances, indices = db.search(query_vector, k)
|
48 |
+
|
49 |
+
results = []
|
50 |
+
for idx, dist in zip(indices[0], distances[0]):
|
51 |
+
answer_text = df.iloc[idx]['Answer']
|
52 |
+
dist = np.sqrt(dist)
|
53 |
+
results.append({"Answer": answer_text, "Distance": dist})
|
54 |
+
|
55 |
+
return results
|
56 |
|
|
|
57 |
def main():
|
58 |
# Заголовок приложения
|
59 |
+
st.title("Demo for LLAMA-2 RAG with CPU only")
|
60 |
|
61 |
+
df_qa = load_documents_from_jsonl('ExportForAI1.jsonl', model, False)
|
62 |
+
save_to_faiss(df_qa)
|
63 |
+
|
64 |
# Текстовое поле для ввода вопроса
|
65 |
input_text = st.text_input("Input", "")
|
66 |
|
67 |
# Кнопка "Answer"
|
68 |
if st.button("Answer"):
|
69 |
+
query_vector = model.encode(input_text.lower())
|
70 |
+
dataList = search_in_faiss(query_vector, df_embed, k=3)
|
71 |
pass
|
72 |
|
73 |
# Таблица с данными
|
74 |
+
st.write("Most relevants answers")
|
75 |
st.table(dataList)
|
76 |
|
77 |
# Текстовое поле для вывода текста
|
78 |
+
st.write("LLAMA generated answer:")
|
79 |
text_output = st.text_area("", "")
|
80 |
|
81 |
# Запуск основной части приложения
|
requirements.txt
CHANGED
@@ -1 +1,6 @@
|
|
1 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
torch
|
3 |
+
faiss-cpu
|
4 |
+
sentence_transformers
|
5 |
+
json
|
6 |
+
tqdm
|