DzmitryXXL commited on
Commit
3c4a7fb
·
verified ·
1 Parent(s): f728c7a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +59 -9
  2. requirements.txt +6 -1
app.py CHANGED
@@ -1,31 +1,81 @@
 
1
  import streamlit as st
 
 
 
 
 
 
2
 
3
- # Пример данных для таблицы
4
  dataList = [
5
- {"Answer": "Ответ 1", "Distance": 0.5},
6
- {"Answer": "Ответ 2", "Distance": 0.8},
7
- {"Answer": "Ответ 3", "Distance": 0.3}
8
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Основная часть приложения
11
  def main():
12
  # Заголовок приложения
13
- st.title("Пример приложения с Streamlit")
14
 
 
 
 
15
  # Текстовое поле для ввода вопроса
16
  input_text = st.text_input("Input", "")
17
 
18
  # Кнопка "Answer"
19
  if st.button("Answer"):
20
- # Здесь может быть код для обработки вопроса
 
21
  pass
22
 
23
  # Таблица с данными
24
- st.write("Таблица с данными:")
25
  st.table(dataList)
26
 
27
  # Текстовое поле для вывода текста
28
- st.write("Текстовое поле для редактирования:")
29
  text_output = st.text_area("", "")
30
 
31
  # Запуск основной части приложения
 
1
+ import json
2
  import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from tqdm.auto import tqdm
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
9
 
 
10
  dataList = [
11
+ {"Answer": "", "Distance": 0},
12
+ {"Answer": "", "Distance": 0},
13
+ {"Answer": "", "Distance": 0}
14
  ]
15
+ def list_to_numpy(obj):
16
+ if isinstance(obj, list):
17
+ return np.array(obj)
18
+ return obj
19
+
20
+ def load_documents_from_jsonl(embeddings_model, jsonl_path, createEmbeddings=False):
21
+ tqdm.pandas(desc="Loading Data")
22
+ df = pd.read_json(jsonl_path, lines=True).progress_apply(lambda x: x)
23
+
24
+ df.columns = ['Question' if 'Question' in col else 'Answer' if 'Answer' in col else col for col in df.columns]
25
+
26
+ if createEmbeddings:
27
+ tqdm.pandas(desc="Creating Embeddings")
28
+ df['Embeddings'] = df.progress_apply(lambda x: embeddings_model.encode(f"Question: {x['Question']} \n Answer: {x['Answer']}").tolist(), axis=1)
29
+
30
+ return df
31
+
32
+ def generate_embeddings(model, text):
33
+ with torch.no_grad():
34
+ embeddings = model.encode(text, convert_to_tensor=True)
35
+ return embeddings.cpu().numpy()
36
+
37
+ def save_to_faiss(df):
38
+ dimension = len(df['Embeddings'].iloc[0])
39
+ db = faiss.IndexFlatL2(dimension)
40
+ embeddings = np.array(df['Embeddings'].tolist()).astype('float32')
41
+ db.add(embeddings)
42
+ faiss.write_index(db, "faiss_index")
43
+
44
+ def search_in_faiss(query_vector, df, k=5):
45
+ db = faiss.read_index("faiss_index")
46
+ query_vector = np.array(query_vector).astype('float32').reshape(1, -1)
47
+ distances, indices = db.search(query_vector, k)
48
+
49
+ results = []
50
+ for idx, dist in zip(indices[0], distances[0]):
51
+ answer_text = df.iloc[idx]['Answer']
52
+ dist = np.sqrt(dist)
53
+ results.append({"Answer": answer_text, "Distance": dist})
54
+
55
+ return results
56
 
 
57
  def main():
58
  # Заголовок приложения
59
+ st.title("Demo for LLAMA-2 RAG with CPU only")
60
 
61
+ df_qa = load_documents_from_jsonl('ExportForAI1.jsonl', model, False)
62
+ save_to_faiss(df_qa)
63
+
64
  # Текстовое поле для ввода вопроса
65
  input_text = st.text_input("Input", "")
66
 
67
  # Кнопка "Answer"
68
  if st.button("Answer"):
69
+ query_vector = model.encode(input_text.lower())
70
+ dataList = search_in_faiss(query_vector, df_embed, k=3)
71
  pass
72
 
73
  # Таблица с данными
74
+ st.write("Most relevants answers")
75
  st.table(dataList)
76
 
77
  # Текстовое поле для вывода текста
78
+ st.write("LLAMA generated answer:")
79
  text_output = st.text_area("", "")
80
 
81
  # Запуск основной части приложения
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- streamlit
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ faiss-cpu
4
+ sentence_transformers
5
+ json
6
+ tqdm