Spaces:
Runtime error
Runtime error
远兮
commited on
Commit
·
062f76d
1
Parent(s):
ebe91e9
add index,bilibili and csv_loader.
Browse files- .gitignore +2 -1
- index_bilibili.ipynb +72 -0
- index_csv_loader.ipynb +63 -0
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
.chroma/
|
|
|
|
1 |
+
.chroma/
|
2 |
+
.inner/
|
index_bilibili.ipynb
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 5,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"loader = BiliBiliLoader(\n",
|
19 |
+
" [\"https://www.bilibili.com/video/BV1xt411o7Xu/\"]\n",
|
20 |
+
")"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 6,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [
|
28 |
+
{
|
29 |
+
"ename": "RuntimeError",
|
30 |
+
"evalue": "This event loop is already running",
|
31 |
+
"output_type": "error",
|
32 |
+
"traceback": [
|
33 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
34 |
+
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
35 |
+
"Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m loader\u001b[39m.\u001b[39;49mload()\n",
|
36 |
+
"File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/langchain/document_loaders/bilibili.py:23\u001b[0m, in \u001b[0;36mBiliBiliLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 21\u001b[0m results \u001b[39m=\u001b[39m []\n\u001b[1;32m 22\u001b[0m \u001b[39mfor\u001b[39;00m url \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvideo_urls:\n\u001b[0;32m---> 23\u001b[0m transcript, video_info \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_bilibili_subs_and_info(url)\n\u001b[1;32m 24\u001b[0m doc \u001b[39m=\u001b[39m Document(page_content\u001b[39m=\u001b[39mtranscript, metadata\u001b[39m=\u001b[39mvideo_info)\n\u001b[1;32m 25\u001b[0m results\u001b[39m.\u001b[39mappend(doc)\n",
|
37 |
+
"File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/langchain/document_loaders/bilibili.py:51\u001b[0m, in \u001b[0;36mBiliBiliLoader._get_bilibili_subs_and_info\u001b[0;34m(self, url)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 49\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00murl\u001b[39m}\u001b[39;00m\u001b[39m is not bilibili url.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m---> 51\u001b[0m video_info \u001b[39m=\u001b[39m sync(v\u001b[39m.\u001b[39;49mget_info())\n\u001b[1;32m 52\u001b[0m video_info\u001b[39m.\u001b[39mupdate({\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: url})\n\u001b[1;32m 54\u001b[0m \u001b[39m# Get subtitle url\u001b[39;00m\n",
|
38 |
+
"File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/bilibili_api/utils/sync.py:24\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coroutine)\u001b[0m\n\u001b[1;32m 22\u001b[0m __ensure_event_loop()\n\u001b[1;32m 23\u001b[0m loop \u001b[39m=\u001b[39m asyncio\u001b[39m.\u001b[39mget_event_loop()\n\u001b[0;32m---> 24\u001b[0m \u001b[39mreturn\u001b[39;00m loop\u001b[39m.\u001b[39;49mrun_until_complete(coroutine)\n",
|
39 |
+
"File \u001b[0;32m~/anaconda3/lib/python3.10/asyncio/base_events.py:625\u001b[0m, in \u001b[0;36mBaseEventLoop.run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 614\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Run until the Future is done.\u001b[39;00m\n\u001b[1;32m 615\u001b[0m \n\u001b[1;32m 616\u001b[0m \u001b[39mIf the argument is a coroutine, it is wrapped in a Task.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 622\u001b[0m \u001b[39mReturn the Future's result, or raise its exception.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 624\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_closed()\n\u001b[0;32m--> 625\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_check_running()\n\u001b[1;32m 627\u001b[0m new_task \u001b[39m=\u001b[39m \u001b[39mnot\u001b[39;00m futures\u001b[39m.\u001b[39misfuture(future)\n\u001b[1;32m 628\u001b[0m future \u001b[39m=\u001b[39m tasks\u001b[39m.\u001b[39mensure_future(future, loop\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m)\n",
|
40 |
+
"File \u001b[0;32m~/anaconda3/lib/python3.10/asyncio/base_events.py:584\u001b[0m, in \u001b[0;36mBaseEventLoop._check_running\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_check_running\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 583\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_running():\n\u001b[0;32m--> 584\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mThis event loop is already running\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 585\u001b[0m \u001b[39mif\u001b[39;00m events\u001b[39m.\u001b[39m_get_running_loop() \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 586\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m 587\u001b[0m \u001b[39m'\u001b[39m\u001b[39mCannot run the event loop while another loop is running\u001b[39m\u001b[39m'\u001b[39m)\n",
|
41 |
+
"\u001b[0;31mRuntimeError\u001b[0m: This event loop is already running"
|
42 |
+
]
|
43 |
+
}
|
44 |
+
],
|
45 |
+
"source": [
|
46 |
+
"loader.load()"
|
47 |
+
]
|
48 |
+
}
|
49 |
+
],
|
50 |
+
"metadata": {
|
51 |
+
"kernelspec": {
|
52 |
+
"display_name": "base",
|
53 |
+
"language": "python",
|
54 |
+
"name": "python3"
|
55 |
+
},
|
56 |
+
"language_info": {
|
57 |
+
"codemirror_mode": {
|
58 |
+
"name": "ipython",
|
59 |
+
"version": 3
|
60 |
+
},
|
61 |
+
"file_extension": ".py",
|
62 |
+
"mimetype": "text/x-python",
|
63 |
+
"name": "python",
|
64 |
+
"nbconvert_exporter": "python",
|
65 |
+
"pygments_lexer": "ipython3",
|
66 |
+
"version": "3.10.10"
|
67 |
+
},
|
68 |
+
"orig_nbformat": 4
|
69 |
+
},
|
70 |
+
"nbformat": 4,
|
71 |
+
"nbformat_minor": 2
|
72 |
+
}
|
index_csv_loader.ipynb
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 12,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from langchain.document_loaders.csv_loader import CSVLoader"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 13,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"loader = CSVLoader(file_path='.inner/xbxb.csv')\n",
|
19 |
+
"\n",
|
20 |
+
"data = loader.load()"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 14,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [
|
28 |
+
{
|
29 |
+
"name": "stdout",
|
30 |
+
"output_type": "stream",
|
31 |
+
"text": [
|
32 |
+
"page_content='\\ufeff日期: 20230312\\nsn: 703052302050002380\\n是否内测用户: 0\\nquery_sentence: 帮我批改这些练习题\\ndomain: 1000726\\ndomain释义: null\\ndomain_support: 1\\nresult_support: 1\\nintent_support: 1\\nquery_intent: 1\\n: ' metadata={'source': '.inner/xbxb.csv', 'row': 0}\n"
|
33 |
+
]
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"source": [
|
37 |
+
"print(data[0])"
|
38 |
+
]
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"metadata": {
|
42 |
+
"kernelspec": {
|
43 |
+
"display_name": "base",
|
44 |
+
"language": "python",
|
45 |
+
"name": "python3"
|
46 |
+
},
|
47 |
+
"language_info": {
|
48 |
+
"codemirror_mode": {
|
49 |
+
"name": "ipython",
|
50 |
+
"version": 3
|
51 |
+
},
|
52 |
+
"file_extension": ".py",
|
53 |
+
"mimetype": "text/x-python",
|
54 |
+
"name": "python",
|
55 |
+
"nbconvert_exporter": "python",
|
56 |
+
"pygments_lexer": "ipython3",
|
57 |
+
"version": "3.10.10"
|
58 |
+
},
|
59 |
+
"orig_nbformat": 4
|
60 |
+
},
|
61 |
+
"nbformat": 4,
|
62 |
+
"nbformat_minor": 2
|
63 |
+
}
|