a main script
Browse files- smart-search-by-notes.py +107 -0
smart-search-by-notes.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# source SMARTSEARCH/bin/activate
|
2 |
+
# pip install sentence-transformers faiss-gpu
|
3 |
+
# pip install --upgrade numpy==1.26
|
4 |
+
|
5 |
+
# Умный поиск по заметкам
|
6 |
+
# https://telegra.ph/Umnyj-poisk-po-zametkam-07-10
|
7 |
+
|
8 |
+
import os
|
9 |
+
from sentence_transformers import SentenceTransformer
|
10 |
+
import faiss
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
|
14 |
+
# 📁 Путь к папке с заметками
|
15 |
+
NOTES_FOLDER = "notebooks"
|
16 |
+
|
17 |
+
# 📌 Модель для смысловых векторов
|
18 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', token='hf_ZgVT...ULhFTGITiNVdUAG')
|
19 |
+
|
20 |
+
# 📥 Загрузка и индексация заметок
|
21 |
+
def load_notes(folder):
|
22 |
+
notes = []
|
23 |
+
filenames = []
|
24 |
+
for file in os.listdir(folder):
|
25 |
+
if file.endswith(".txt") or file.endswith(".md"):
|
26 |
+
path = os.path.join(folder, file)
|
27 |
+
with open(path, "r", encoding="utf-8") as f:
|
28 |
+
content = f.read()
|
29 |
+
notes.append(content)
|
30 |
+
filenames.append(file)
|
31 |
+
return notes, filenames
|
32 |
+
|
33 |
+
# 🔍 Поиск похожих заметок
|
34 |
+
def semantic_search(query, notes, vectors, filenames, top_k=6):
|
35 |
+
query_vec = model.encode([query])
|
36 |
+
D, I = index.search(query_vec, top_k)
|
37 |
+
results = []
|
38 |
+
for i, idx in enumerate(I[0]):
|
39 |
+
snippet = notes[idx][:300].replace("\n", " ").strip()
|
40 |
+
results.append((filenames[idx], D[0][i], snippet))
|
41 |
+
return results
|
42 |
+
|
43 |
+
# ▶️ Основной блок
|
44 |
+
notes, filenames = load_notes(NOTES_FOLDER)
|
45 |
+
vectors = model.encode(notes)
|
46 |
+
|
47 |
+
# Построение индекса FAISS
|
48 |
+
dim = vectors.shape[1]
|
49 |
+
index = faiss.IndexFlatL2(dim)
|
50 |
+
index.add(np.array(vectors))
|
51 |
+
|
52 |
+
# 🔁 Цикл запросов
|
53 |
+
while True:
|
54 |
+
q = input("\n🔎 Запрос (или 'exit'): ").strip()
|
55 |
+
if q.lower() in ['exit', 'quit']:
|
56 |
+
break
|
57 |
+
|
58 |
+
results = semantic_search(q, notes, vectors, filenames)
|
59 |
+
print("\n📚 Результаты:")
|
60 |
+
for fname, score, snippet in results:
|
61 |
+
print(f"\n📝 {fname} (схожесть: {score:.2f})")
|
62 |
+
print(f" → {snippet[:150]}...")
|
63 |
+
|
64 |
+
#🔎 Запрос (или 'exit'): When was Amsterdam voted the best city to live in?
|
65 |
+
|
66 |
+
# 📚 Результаты:
|
67 |
+
|
68 |
+
# 📝 3.txt (схожесть: 0.65)
|
69 |
+
# → In 2022, Amsterdam was ranked the ninth-best city to live in by the Economist Intelligence Unit[28] and 12th on quality of living for environment and ...
|
70 |
+
|
71 |
+
# 📝 12.txt (схожесть: 0.89)
|
72 |
+
# → In 1300, Amsterdam's population was around 1,000 people.[89] While many towns in Holland experienced population decline during the 15th and 16th centu...
|
73 |
+
|
74 |
+
# 📝 10.txt (схожесть: 0.90)
|
75 |
+
# → Amsterdam is located in the Western Netherlands, in the province of North Holland, the capital of which is not Amsterdam, but rather Haarlem. The rive...
|
76 |
+
|
77 |
+
# 📝 1.txt (схожесть: 0.91)
|
78 |
+
# → Amsterdam (/ˈæmstərdæm/ AM-stər-dam, UK also /ˌæmstərˈdæm/ AM-stər-DAM;[12][13] Dutch: [ˌɑmstərˈdɑm] ⓘ; lit. 'Dam in the Amstel')[14] is the capital[a...
|
79 |
+
|
80 |
+
# 📝 7.txt (схожесть: 0.94)
|
81 |
+
# → The end of the 19th century is sometimes called Amsterdam's second Golden Age.[57] New museums, a railway station, and the Concertgebouw were built; A...
|
82 |
+
|
83 |
+
# 📝 11.txt (схожесть: 0.96)
|
84 |
+
# → Amsterdam has an oceanic climate (Köppen: Cfb)[81] strongly influenced by its proximity to the North Sea to the west, with prevailing westerly winds. ...
|
85 |
+
|
86 |
+
|
87 |
+
# 🔎 Запрос (или 'exit'): How many people live in Amsterdam?
|
88 |
+
|
89 |
+
# 📚 Результаты:
|
90 |
+
|
91 |
+
# 📝 3.txt (схожесть: 0.69)
|
92 |
+
# → In 2022, Amsterdam was ranked the ninth-best city to live in by the Economist Intelligence Unit[28] and 12th on quality of living for environment and ...
|
93 |
+
|
94 |
+
# 📝 12.txt (схожесть: 0.70)
|
95 |
+
# → In 1300, Amsterdam's population was around 1,000 people.[89] While many towns in Holland experienced population decline during the 15th and 16th centu...
|
96 |
+
|
97 |
+
# 📝 10.txt (схожесть: 0.82)
|
98 |
+
# → Amsterdam is located in the Western Netherlands, in the province of North Holland, the capital of which is not Amsterdam, but rather Haarlem. The rive...
|
99 |
+
|
100 |
+
# 📝 1.txt (схожесть: 0.85)
|
101 |
+
# → Amsterdam (/ˈæmstərdæm/ AM-stər-dam, UK also /ˌæmstərˈdæm/ AM-stər-DAM;[12][13] Dutch: [ˌɑmstərˈdɑm] ⓘ; lit. 'Dam in the Amstel')[14] is the capital[a...
|
102 |
+
|
103 |
+
# 📝 11.txt (схожесть: 1.01)
|
104 |
+
# → Amsterdam has an oceanic climate (Köppen: Cfb)[81] strongly influenced by its proximity to the North Sea to the west, with prevailing westerly winds. ...
|
105 |
+
|
106 |
+
# 📝 2.txt (схожесть: 1.01)
|
107 |
+
# → Amsterdam's main attractions include its historic canals; the Rijksmuseum, the state museum with Dutch Golden Age art; the Van Gogh Museum; the Dam Sq...
|