{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8495bede-ab8f-416b-b5f2-6a76b1e63935", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\Projects\\LLMs\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from tqdm import tqdm\n", "from sentence_transformers import SentenceTransformer, util" ] }, { "cell_type": "code", "execution_count": 2, "id": "2b8cae6d-547b-4018-9f68-b0a45284b4b4", "metadata": { "tags": [] }, "outputs": [], "source": [ "# model = SentenceTransformer('DMetaSoul/sbert-chinese-general-v2')\n", "model = SentenceTransformer('TintinMeimei/menglang_yongtulv_aimatch_v1')" ] }, { "cell_type": "code", "execution_count": 3, "id": "d3907a6f-f8ab-40fe-8702-c8cb81e189c6", "metadata": { "tags": [] }, "outputs": [], "source": [ "def sim(text1, text2):\n", " emb1 = model.encode(text1, convert_to_tensor=True)\n", " emb2 = model.encode(text2, convert_to_tensor=True)\n", " score = util.cos_sim(emb1, emb2)\n", " return score" ] }, { "cell_type": "code", "execution_count": 24, "id": "3cec9f05-4ea9-46f8-a393-950c67a0150a", "metadata": { "tags": [] }, "outputs": [], "source": [ "text1 = '挂机空调'\n", "# text2 = '1.1.11 高效节能家用电器制造\\n包括节能型房间空调器、空调机组、电冰箱、电动洗衣机、平板电视机、电风扇等家用电器制造。房间空气调节器能效优于《房间空气调节器能效限定值及能效等级》(GB 12021.3)标准1级能效水平;转速可控型房间空气调节器能效优于《转速可控型房间空气调节器能效限定值及能效等级》(GB 21455)标准1级能效水平;多联式空调(热泵)机组能效比优于《多联式空调(热泵)机组能效限定值及能源效率等级》(GB 21454)标准1级能效水平;家用电冰箱能效优于《家用电冰箱耗电量限定值及能效等级》(GB 12021.2)标准1级能效水平;电动洗衣机能效优于《电动洗衣机能效水效限定值及等级》(GB 12021.4)标准1级能效水平;电饭煲能效优于《电饭锅能效限定值及能效等级》(GB 12021.6)标准1级能效水平;平板电视机能效优于《平板电视能效限定值及能效等级》(GB 24850)标准1级能效水平;交流电风扇能效优于《交流电风扇能效限定值及能效等级》(GB 12021.9)标准1级能效水平。其他高效节能家用电器能效均优于相应国家强制性标准1级能效水平。'\n", "# text2 = '包括节能泵、节能型真空干燥设备、节能型真空炉等设备制造。清水离心泵能效指标优于《清水离心泵能效限定值及节能评价值》(GB 19762)标准中节能评价值水平;石油化工离心泵能效优于《石油化工离心泵能效限定值及能效等级》(GB 32284)标准中1级能效水平;潜水电泵能效优于《井用潜水电泵能效限定值及能效等级》(GB 32030)、《小型潜水电泵能效限定值及能效等级》(GB 32029)、《污水污物潜水电泵能效限定值及能效等级》(GB 32031)标准中1级能效水平。'\n", "text2 = '退耕还林'" ] }, { "cell_type": "code", "execution_count": 25, "id": "d570bf57-2518-4306-a7ae-712e81199460", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.5000]], device='cuda:0')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sim(text1, text2)" ] }, { "cell_type": "markdown", "id": "040cc794-9bb0-4c22-986c-933ca55ee637", "metadata": {}, "source": [ "### Process Data" ] }, { "cell_type": "code", "execution_count": 6, "id": "d46e4e74-f7c2-4339-b009-4ba77f1b2f9a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | X1 | \n", "X2 | \n", "Y | \n", "Split | \n", "
---|---|---|---|---|
0 | \n", "中新制药厂空调末端送回风系统改造-询价公示 | \n", "1.1.11 高效节能家用电器制造\\n包括节能型房间空调器、空调机组、电冰箱、电动洗衣机、平... | \n", "1 | \n", "train | \n", "
1 | \n", "中新制药厂空调末端送回风系统改造-询价公示 | \n", "1.5.1 锅炉(窑炉)节能改造和能效提升\\n包括燃煤锅炉“以大代小”,采用先进燃煤锅炉、节... | \n", "0 | \n", "train | \n", "