远兮 commited on
Commit
062f76d
·
1 Parent(s): ebe91e9

add index,bilibili and csv_loader.

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. index_bilibili.ipynb +72 -0
  3. index_csv_loader.ipynb +63 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- .chroma/
 
 
1
+ .chroma/
2
+ .inner/
index_bilibili.ipynb ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from langchain.document_loaders.bilibili import BiliBiliLoader"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 5,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "loader = BiliBiliLoader(\n",
19
+ " [\"https://www.bilibili.com/video/BV1xt411o7Xu/\"]\n",
20
+ ")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 6,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "ename": "RuntimeError",
30
+ "evalue": "This event loop is already running",
31
+ "output_type": "error",
32
+ "traceback": [
33
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
34
+ "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
35
+ "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m loader\u001b[39m.\u001b[39;49mload()\n",
36
+ "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/langchain/document_loaders/bilibili.py:23\u001b[0m, in \u001b[0;36mBiliBiliLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 21\u001b[0m results \u001b[39m=\u001b[39m []\n\u001b[1;32m 22\u001b[0m \u001b[39mfor\u001b[39;00m url \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvideo_urls:\n\u001b[0;32m---> 23\u001b[0m transcript, video_info \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_bilibili_subs_and_info(url)\n\u001b[1;32m 24\u001b[0m doc \u001b[39m=\u001b[39m Document(page_content\u001b[39m=\u001b[39mtranscript, metadata\u001b[39m=\u001b[39mvideo_info)\n\u001b[1;32m 25\u001b[0m results\u001b[39m.\u001b[39mappend(doc)\n",
37
+ "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/langchain/document_loaders/bilibili.py:51\u001b[0m, in \u001b[0;36mBiliBiliLoader._get_bilibili_subs_and_info\u001b[0;34m(self, url)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 49\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00murl\u001b[39m}\u001b[39;00m\u001b[39m is not bilibili url.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m---> 51\u001b[0m video_info \u001b[39m=\u001b[39m sync(v\u001b[39m.\u001b[39;49mget_info())\n\u001b[1;32m 52\u001b[0m video_info\u001b[39m.\u001b[39mupdate({\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: url})\n\u001b[1;32m 54\u001b[0m \u001b[39m# Get subtitle url\u001b[39;00m\n",
38
+ "File \u001b[0;32m~/anaconda3/lib/python3.10/site-packages/bilibili_api/utils/sync.py:24\u001b[0m, in \u001b[0;36msync\u001b[0;34m(coroutine)\u001b[0m\n\u001b[1;32m 22\u001b[0m __ensure_event_loop()\n\u001b[1;32m 23\u001b[0m loop \u001b[39m=\u001b[39m asyncio\u001b[39m.\u001b[39mget_event_loop()\n\u001b[0;32m---> 24\u001b[0m \u001b[39mreturn\u001b[39;00m loop\u001b[39m.\u001b[39;49mrun_until_complete(coroutine)\n",
39
+ "File \u001b[0;32m~/anaconda3/lib/python3.10/asyncio/base_events.py:625\u001b[0m, in \u001b[0;36mBaseEventLoop.run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 614\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Run until the Future is done.\u001b[39;00m\n\u001b[1;32m 615\u001b[0m \n\u001b[1;32m 616\u001b[0m \u001b[39mIf the argument is a coroutine, it is wrapped in a Task.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 622\u001b[0m \u001b[39mReturn the Future's result, or raise its exception.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 624\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_closed()\n\u001b[0;32m--> 625\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_check_running()\n\u001b[1;32m 627\u001b[0m new_task \u001b[39m=\u001b[39m \u001b[39mnot\u001b[39;00m futures\u001b[39m.\u001b[39misfuture(future)\n\u001b[1;32m 628\u001b[0m future \u001b[39m=\u001b[39m tasks\u001b[39m.\u001b[39mensure_future(future, loop\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m)\n",
40
+ "File \u001b[0;32m~/anaconda3/lib/python3.10/asyncio/base_events.py:584\u001b[0m, in \u001b[0;36mBaseEventLoop._check_running\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_check_running\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 583\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_running():\n\u001b[0;32m--> 584\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m'\u001b[39m\u001b[39mThis event loop is already running\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 585\u001b[0m \u001b[39mif\u001b[39;00m events\u001b[39m.\u001b[39m_get_running_loop() \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 586\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[1;32m 587\u001b[0m \u001b[39m'\u001b[39m\u001b[39mCannot run the event loop while another loop is running\u001b[39m\u001b[39m'\u001b[39m)\n",
41
+ "\u001b[0;31mRuntimeError\u001b[0m: This event loop is already running"
42
+ ]
43
+ }
44
+ ],
45
+ "source": [
46
+ "loader.load()"
47
+ ]
48
+ }
49
+ ],
50
+ "metadata": {
51
+ "kernelspec": {
52
+ "display_name": "base",
53
+ "language": "python",
54
+ "name": "python3"
55
+ },
56
+ "language_info": {
57
+ "codemirror_mode": {
58
+ "name": "ipython",
59
+ "version": 3
60
+ },
61
+ "file_extension": ".py",
62
+ "mimetype": "text/x-python",
63
+ "name": "python",
64
+ "nbconvert_exporter": "python",
65
+ "pygments_lexer": "ipython3",
66
+ "version": "3.10.10"
67
+ },
68
+ "orig_nbformat": 4
69
+ },
70
+ "nbformat": 4,
71
+ "nbformat_minor": 2
72
+ }
index_csv_loader.ipynb ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 12,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from langchain.document_loaders.csv_loader import CSVLoader"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 13,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "loader = CSVLoader(file_path='.inner/xbxb.csv')\n",
19
+ "\n",
20
+ "data = loader.load()"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 14,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "page_content='\\ufeff日期: 20230312\\nsn: 703052302050002380\\n是否内测用户: 0\\nquery_sentence: 帮我批改这些练习题\\ndomain: 1000726\\ndomain释义: null\\ndomain_support: 1\\nresult_support: 1\\nintent_support: 1\\nquery_intent: 1\\n: ' metadata={'source': '.inner/xbxb.csv', 'row': 0}\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "print(data[0])"
38
+ ]
39
+ }
40
+ ],
41
+ "metadata": {
42
+ "kernelspec": {
43
+ "display_name": "base",
44
+ "language": "python",
45
+ "name": "python3"
46
+ },
47
+ "language_info": {
48
+ "codemirror_mode": {
49
+ "name": "ipython",
50
+ "version": 3
51
+ },
52
+ "file_extension": ".py",
53
+ "mimetype": "text/x-python",
54
+ "name": "python",
55
+ "nbconvert_exporter": "python",
56
+ "pygments_lexer": "ipython3",
57
+ "version": "3.10.10"
58
+ },
59
+ "orig_nbformat": 4
60
+ },
61
+ "nbformat": 4,
62
+ "nbformat_minor": 2
63
+ }