Spaces:
Sleeping
Sleeping
File size: 5,030 Bytes
307a8be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.vectorstores.faiss import FAISS\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def load_index():\n",
" index_path = \"../storage\"\n",
" embedding_model = HuggingFaceEmbeddings(model_name=\"intfloat/multilingual-e5-large\")\n",
" return FAISS.load_local(\n",
" folder_path=index_path, \n",
" embeddings=embedding_model,\n",
" allow_dangerous_deserialization=True\n",
" )\n",
" \n",
"index = load_index()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"def search_documents(input_text, courseOfStudy, index, k = 4, fetch_k = 20, score_threshold=0.5):\n",
" if not input_text:\n",
" return []\n",
" \n",
" metadata = {}\n",
" if courseOfStudy:\n",
" metadata[\"学校種別\"] = courseOfStudy\n",
" \n",
" try:\n",
" docs_and_scores = index.similarity_search_with_score(\n",
" input_text, \n",
" filter=metadata,\n",
" fetch_k=fetch_k,\n",
" k = k,\n",
" score_threshold=score_threshold\n",
" )\n",
" except Exception as e:\n",
" print(f\"Error during search: {e}\")\n",
" return []\n",
" \n",
" rows = [\n",
" [\n",
" f\"<a href='https://w3id.org/jp-cos/{doc.metadata['id']}' target='_blank'>{doc.metadata['id']}</a>\", \n",
" doc.metadata[\"学校種別\"],\n",
" doc.metadata[\"教科等\"],\n",
" round(float(score), 3),\n",
" doc.page_content,\n",
" ]\n",
" for doc, score in docs_and_scores\n",
" ]\n",
"\n",
" json_data = [\n",
" {\n",
" \"dcterms:identifier\": doc.metadata['id'],\n",
" \"jp-cos:courseOfStudy\": doc.metadata[\"学校種別\"],\n",
" \"jp-cos:subjectArea\": doc.metadata[\"教科等\"],\n",
" \"score\": round(float(score), 3),\n",
" \"jp-cos:sectionText\": doc.page_content,\n",
" }\n",
" for doc, score in docs_and_scores\n",
" ]\n",
"\n",
" return [\n",
" rows,\n",
" json_data\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'dcterms:identifier': '8362225540000000',\n",
" 'jp-cos:courseOfStudy': '中学校',\n",
" 'jp-cos:sectionText': 'アの(ウ)の㋑については,各器官の働きを中心に扱うこと。',\n",
" 'jp-cos:subjectArea': '理科',\n",
" 'score': 0.416},\n",
" {'dcterms:identifier': '8361235820000000',\n",
" 'jp-cos:courseOfStudy': '中学校',\n",
" 'jp-cos:sectionText': 'アの(ア)の㋑については,pHにも触れること。',\n",
" 'jp-cos:subjectArea': '理科',\n",
" 'score': 0.417},\n",
" {'dcterms:identifier': '8362235720000000',\n",
" 'jp-cos:courseOfStudy': '中学校',\n",
" 'jp-cos:sectionText': 'アの(ア)の㋑については,有性生殖の仕組みを減数分裂と関連付けて扱うこと。「無性生殖」については,単細胞生物の分裂や栄養生殖にも触れること。',\n",
" 'jp-cos:subjectArea': '理科',\n",
" 'score': 0.419},\n",
" {'dcterms:identifier': '8361225630000000',\n",
" 'jp-cos:courseOfStudy': '中学校',\n",
" 'jp-cos:sectionText': 'アの(イ)の㋑の「酸化や還元」については,簡単なものを扱うこと。',\n",
" 'jp-cos:subjectArea': '理科',\n",
" 'score': 0.422}]\n"
]
}
],
"source": [
"input_text = \"小さい体でジャンプするトビムシは、菌糸類を食べて糞にする。\"\n",
"\n",
"grade = \"中学校\"\n",
"\n",
"k = 4\n",
"\n",
"fetch_k = 500\n",
"\n",
"threshold = 0.5\n",
"\n",
"result = search_documents(input_text, grade, index, k, fetch_k, threshold)\n",
"\n",
"pprint(result[1])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|