KevinHuSh
commited on
Commit
·
249b27c
1
Parent(s):
d94c6df
go through upload, create kb, add doc to kb (#11)
Browse files* add field progress msg into docinfo; add file processing procedure
* go through upload, create kb, add doc to kb
- Cargo.toml +1 -0
- docker/docker-compose.yml +9 -9
- migration/src/m20220101_000001_create_table.rs +39 -5
- python/conf/sys.cnf +3 -4
- python/nlp/huchunk.py +41 -0
- python/svr/parse_user_docs.py +171 -0
- python/util/__init__.py +19 -0
- python/util/config.py +0 -1
- python/util/db_conn.py +21 -2
- python/util/es_conn.py +1 -1
- src/api/doc_info.rs +48 -30
- src/api/kb_info.rs +39 -4
- src/api/tag.rs +58 -0
- src/entity/dialog2_kb.rs +3 -2
- src/entity/doc2_doc.rs +3 -2
- src/entity/doc_info.rs +4 -3
- src/entity/kb2_doc.rs +6 -5
- src/entity/kb_info.rs +2 -2
- src/entity/mod.rs +3 -3
- src/entity/tag2_doc.rs +3 -2
- src/entity/tag_info.rs +2 -2
- src/main.rs +1 -0
- src/service/doc_info.rs +33 -7
- src/service/kb_info.rs +28 -2
Cargo.toml
CHANGED
@@ -24,6 +24,7 @@ listenfd = "1.0.1"
|
|
24 |
chrono = "0.4.31"
|
25 |
migration = { path = "./migration" }
|
26 |
futures-util = "0.3.29"
|
|
|
27 |
|
28 |
[[bin]]
|
29 |
name = "doc_gpt"
|
|
|
24 |
chrono = "0.4.31"
|
25 |
migration = { path = "./migration" }
|
26 |
futures-util = "0.3.29"
|
27 |
+
actix-multipart-extract = "0.1.5"
|
28 |
|
29 |
[[bin]]
|
30 |
name = "doc_gpt"
|
docker/docker-compose.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
version: '2.2'
|
2 |
services:
|
3 |
es01:
|
4 |
-
container_name:
|
5 |
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
6 |
volumes:
|
7 |
- esdata01:/usr/share/elasticsearch/data
|
@@ -20,14 +20,14 @@ services:
|
|
20 |
soft: -1
|
21 |
hard: -1
|
22 |
networks:
|
23 |
-
-
|
24 |
restart: always
|
25 |
|
26 |
kibana:
|
27 |
depends_on:
|
28 |
- es01
|
29 |
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
30 |
-
container_name:
|
31 |
volumes:
|
32 |
- kibanadata:/usr/share/kibana/data
|
33 |
ports:
|
@@ -37,21 +37,21 @@ services:
|
|
37 |
- ELASTICSEARCH_HOSTS=http://es01:9200
|
38 |
mem_limit: ${MEM_LIMIT}
|
39 |
networks:
|
40 |
-
-
|
41 |
|
42 |
postgres:
|
43 |
image: postgres
|
44 |
-
container_name:
|
45 |
environment:
|
46 |
- POSTGRES_USER=${POSTGRES_USER}
|
47 |
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
48 |
- POSTGRES_DB=${POSTGRES_DB}
|
49 |
ports:
|
50 |
-
- 5455:
|
51 |
volumes:
|
52 |
-
- pg_data:/
|
53 |
networks:
|
54 |
-
-
|
55 |
restart: always
|
56 |
|
57 |
|
@@ -64,5 +64,5 @@ volumes:
|
|
64 |
driver: local
|
65 |
|
66 |
networks:
|
67 |
-
|
68 |
driver: bridge
|
|
|
1 |
version: '2.2'
|
2 |
services:
|
3 |
es01:
|
4 |
+
container_name: docgpt-es-01
|
5 |
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
|
6 |
volumes:
|
7 |
- esdata01:/usr/share/elasticsearch/data
|
|
|
20 |
soft: -1
|
21 |
hard: -1
|
22 |
networks:
|
23 |
+
- docgpt
|
24 |
restart: always
|
25 |
|
26 |
kibana:
|
27 |
depends_on:
|
28 |
- es01
|
29 |
image: docker.elastic.co/kibana/kibana:${STACK_VERSION}
|
30 |
+
container_name: docgpt-kibana
|
31 |
volumes:
|
32 |
- kibanadata:/usr/share/kibana/data
|
33 |
ports:
|
|
|
37 |
- ELASTICSEARCH_HOSTS=http://es01:9200
|
38 |
mem_limit: ${MEM_LIMIT}
|
39 |
networks:
|
40 |
+
- docgpt
|
41 |
|
42 |
postgres:
|
43 |
image: postgres
|
44 |
+
container_name: docgpt-postgres
|
45 |
environment:
|
46 |
- POSTGRES_USER=${POSTGRES_USER}
|
47 |
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
48 |
- POSTGRES_DB=${POSTGRES_DB}
|
49 |
ports:
|
50 |
+
- 5455:5432
|
51 |
volumes:
|
52 |
+
- pg_data:/var/lib/postgresql/data
|
53 |
networks:
|
54 |
+
- docgpt
|
55 |
restart: always
|
56 |
|
57 |
|
|
|
64 |
driver: local
|
65 |
|
66 |
networks:
|
67 |
+
docgpt:
|
68 |
driver: bridge
|
migration/src/m20220101_000001_create_table.rs
CHANGED
@@ -47,8 +47,8 @@ impl MigrationTrait for Migration {
|
|
47 |
.col(ColumnDef::new(TagInfo::Uid).big_integer().not_null())
|
48 |
.col(ColumnDef::new(TagInfo::TagName).string().not_null())
|
49 |
.col(ColumnDef::new(TagInfo::Regx).string())
|
50 |
-
.col(ColumnDef::new(TagInfo::Color).
|
51 |
-
.col(ColumnDef::new(TagInfo::Icon).
|
52 |
.col(ColumnDef::new(TagInfo::Dir).string())
|
53 |
.col(ColumnDef::new(TagInfo::CreatedAt).date().not_null())
|
54 |
.col(ColumnDef::new(TagInfo::UpdatedAt).date().not_null())
|
@@ -62,6 +62,13 @@ impl MigrationTrait for Migration {
|
|
62 |
Table::create()
|
63 |
.table(Tag2Doc::Table)
|
64 |
.if_not_exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
.col(ColumnDef::new(Tag2Doc::TagId).big_integer())
|
66 |
.col(ColumnDef::new(Tag2Doc::Did).big_integer())
|
67 |
.to_owned(),
|
@@ -73,6 +80,13 @@ impl MigrationTrait for Migration {
|
|
73 |
Table::create()
|
74 |
.table(Kb2Doc::Table)
|
75 |
.if_not_exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
.col(ColumnDef::new(Kb2Doc::KbId).big_integer())
|
77 |
.col(ColumnDef::new(Kb2Doc::Did).big_integer())
|
78 |
.to_owned(),
|
@@ -84,6 +98,13 @@ impl MigrationTrait for Migration {
|
|
84 |
Table::create()
|
85 |
.table(Dialog2Kb::Table)
|
86 |
.if_not_exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
.col(ColumnDef::new(Dialog2Kb::DialogId).big_integer())
|
88 |
.col(ColumnDef::new(Dialog2Kb::KbId).big_integer())
|
89 |
.to_owned(),
|
@@ -95,6 +116,13 @@ impl MigrationTrait for Migration {
|
|
95 |
Table::create()
|
96 |
.table(Doc2Doc::Table)
|
97 |
.if_not_exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
.col(ColumnDef::new(Doc2Doc::ParentId).big_integer())
|
99 |
.col(ColumnDef::new(Doc2Doc::Did).big_integer())
|
100 |
.to_owned(),
|
@@ -112,7 +140,7 @@ impl MigrationTrait for Migration {
|
|
112 |
.primary_key())
|
113 |
.col(ColumnDef::new(KbInfo::Uid).big_integer().not_null())
|
114 |
.col(ColumnDef::new(KbInfo::KbName).string().not_null())
|
115 |
-
.col(ColumnDef::new(KbInfo::Icon).
|
116 |
.col(ColumnDef::new(KbInfo::CreatedAt).date().not_null())
|
117 |
.col(ColumnDef::new(KbInfo::UpdatedAt).date().not_null())
|
118 |
.col(ColumnDef::new(KbInfo::IsDeleted).boolean().default(false))
|
@@ -135,6 +163,7 @@ impl MigrationTrait for Migration {
|
|
135 |
.col(ColumnDef::new(DocInfo::Size).big_integer().not_null())
|
136 |
.col(ColumnDef::new(DocInfo::Type).string().not_null()).comment("doc|folder")
|
137 |
.col(ColumnDef::new(DocInfo::KbProgress).float().default(0))
|
|
|
138 |
.col(ColumnDef::new(DocInfo::CreatedAt).date().not_null())
|
139 |
.col(ColumnDef::new(DocInfo::UpdatedAt).date().not_null())
|
140 |
.col(ColumnDef::new(DocInfo::IsDeleted).boolean().default(false))
|
@@ -148,7 +177,7 @@ impl MigrationTrait for Migration {
|
|
148 |
.table(DialogInfo::Table)
|
149 |
.if_not_exists()
|
150 |
.col(ColumnDef::new(DialogInfo::DialogId)
|
151 |
-
|
152 |
.not_null()
|
153 |
.auto_increment()
|
154 |
.primary_key())
|
@@ -240,6 +269,7 @@ enum TagInfo {
|
|
240 |
#[derive(DeriveIden)]
|
241 |
enum Tag2Doc {
|
242 |
Table,
|
|
|
243 |
TagId,
|
244 |
Did,
|
245 |
}
|
@@ -247,6 +277,7 @@ enum Tag2Doc {
|
|
247 |
#[derive(DeriveIden)]
|
248 |
enum Kb2Doc {
|
249 |
Table,
|
|
|
250 |
KbId,
|
251 |
Did,
|
252 |
}
|
@@ -254,6 +285,7 @@ enum Kb2Doc {
|
|
254 |
#[derive(DeriveIden)]
|
255 |
enum Dialog2Kb {
|
256 |
Table,
|
|
|
257 |
DialogId,
|
258 |
KbId,
|
259 |
}
|
@@ -261,6 +293,7 @@ enum Dialog2Kb {
|
|
261 |
#[derive(DeriveIden)]
|
262 |
enum Doc2Doc {
|
263 |
Table,
|
|
|
264 |
ParentId,
|
265 |
Did,
|
266 |
}
|
@@ -287,6 +320,7 @@ enum DocInfo {
|
|
287 |
Size,
|
288 |
Type,
|
289 |
KbProgress,
|
|
|
290 |
CreatedAt,
|
291 |
UpdatedAt,
|
292 |
IsDeleted,
|
@@ -302,4 +336,4 @@ enum DialogInfo {
|
|
302 |
CreatedAt,
|
303 |
UpdatedAt,
|
304 |
IsDeleted,
|
305 |
-
}
|
|
|
47 |
.col(ColumnDef::new(TagInfo::Uid).big_integer().not_null())
|
48 |
.col(ColumnDef::new(TagInfo::TagName).string().not_null())
|
49 |
.col(ColumnDef::new(TagInfo::Regx).string())
|
50 |
+
.col(ColumnDef::new(TagInfo::Color).tiny_unsigned().default(1))
|
51 |
+
.col(ColumnDef::new(TagInfo::Icon).tiny_unsigned().default(1))
|
52 |
.col(ColumnDef::new(TagInfo::Dir).string())
|
53 |
.col(ColumnDef::new(TagInfo::CreatedAt).date().not_null())
|
54 |
.col(ColumnDef::new(TagInfo::UpdatedAt).date().not_null())
|
|
|
62 |
Table::create()
|
63 |
.table(Tag2Doc::Table)
|
64 |
.if_not_exists()
|
65 |
+
.col(
|
66 |
+
ColumnDef::new(Tag2Doc::Id)
|
67 |
+
.big_integer()
|
68 |
+
.not_null()
|
69 |
+
.auto_increment()
|
70 |
+
.primary_key(),
|
71 |
+
)
|
72 |
.col(ColumnDef::new(Tag2Doc::TagId).big_integer())
|
73 |
.col(ColumnDef::new(Tag2Doc::Did).big_integer())
|
74 |
.to_owned(),
|
|
|
80 |
Table::create()
|
81 |
.table(Kb2Doc::Table)
|
82 |
.if_not_exists()
|
83 |
+
.col(
|
84 |
+
ColumnDef::new(Kb2Doc::Id)
|
85 |
+
.big_integer()
|
86 |
+
.not_null()
|
87 |
+
.auto_increment()
|
88 |
+
.primary_key(),
|
89 |
+
)
|
90 |
.col(ColumnDef::new(Kb2Doc::KbId).big_integer())
|
91 |
.col(ColumnDef::new(Kb2Doc::Did).big_integer())
|
92 |
.to_owned(),
|
|
|
98 |
Table::create()
|
99 |
.table(Dialog2Kb::Table)
|
100 |
.if_not_exists()
|
101 |
+
.col(
|
102 |
+
ColumnDef::new(Dialog2Kb::Id)
|
103 |
+
.big_integer()
|
104 |
+
.not_null()
|
105 |
+
.auto_increment()
|
106 |
+
.primary_key(),
|
107 |
+
)
|
108 |
.col(ColumnDef::new(Dialog2Kb::DialogId).big_integer())
|
109 |
.col(ColumnDef::new(Dialog2Kb::KbId).big_integer())
|
110 |
.to_owned(),
|
|
|
116 |
Table::create()
|
117 |
.table(Doc2Doc::Table)
|
118 |
.if_not_exists()
|
119 |
+
.col(
|
120 |
+
ColumnDef::new(Doc2Doc::Id)
|
121 |
+
.big_integer()
|
122 |
+
.not_null()
|
123 |
+
.auto_increment()
|
124 |
+
.primary_key(),
|
125 |
+
)
|
126 |
.col(ColumnDef::new(Doc2Doc::ParentId).big_integer())
|
127 |
.col(ColumnDef::new(Doc2Doc::Did).big_integer())
|
128 |
.to_owned(),
|
|
|
140 |
.primary_key())
|
141 |
.col(ColumnDef::new(KbInfo::Uid).big_integer().not_null())
|
142 |
.col(ColumnDef::new(KbInfo::KbName).string().not_null())
|
143 |
+
.col(ColumnDef::new(KbInfo::Icon).tiny_unsigned().default(1))
|
144 |
.col(ColumnDef::new(KbInfo::CreatedAt).date().not_null())
|
145 |
.col(ColumnDef::new(KbInfo::UpdatedAt).date().not_null())
|
146 |
.col(ColumnDef::new(KbInfo::IsDeleted).boolean().default(false))
|
|
|
163 |
.col(ColumnDef::new(DocInfo::Size).big_integer().not_null())
|
164 |
.col(ColumnDef::new(DocInfo::Type).string().not_null()).comment("doc|folder")
|
165 |
.col(ColumnDef::new(DocInfo::KbProgress).float().default(0))
|
166 |
+
.col(ColumnDef::new(DocInfo::KbProgressMsg).string().default(""))
|
167 |
.col(ColumnDef::new(DocInfo::CreatedAt).date().not_null())
|
168 |
.col(ColumnDef::new(DocInfo::UpdatedAt).date().not_null())
|
169 |
.col(ColumnDef::new(DocInfo::IsDeleted).boolean().default(false))
|
|
|
177 |
.table(DialogInfo::Table)
|
178 |
.if_not_exists()
|
179 |
.col(ColumnDef::new(DialogInfo::DialogId)
|
180 |
+
.big_integer()
|
181 |
.not_null()
|
182 |
.auto_increment()
|
183 |
.primary_key())
|
|
|
269 |
#[derive(DeriveIden)]
|
270 |
enum Tag2Doc {
|
271 |
Table,
|
272 |
+
Id,
|
273 |
TagId,
|
274 |
Did,
|
275 |
}
|
|
|
277 |
#[derive(DeriveIden)]
|
278 |
enum Kb2Doc {
|
279 |
Table,
|
280 |
+
Id,
|
281 |
KbId,
|
282 |
Did,
|
283 |
}
|
|
|
285 |
#[derive(DeriveIden)]
|
286 |
enum Dialog2Kb {
|
287 |
Table,
|
288 |
+
Id,
|
289 |
DialogId,
|
290 |
KbId,
|
291 |
}
|
|
|
293 |
#[derive(DeriveIden)]
|
294 |
enum Doc2Doc {
|
295 |
Table,
|
296 |
+
Id,
|
297 |
ParentId,
|
298 |
Did,
|
299 |
}
|
|
|
320 |
Size,
|
321 |
Type,
|
322 |
KbProgress,
|
323 |
+
KbProgressMsg,
|
324 |
CreatedAt,
|
325 |
UpdatedAt,
|
326 |
IsDeleted,
|
|
|
336 |
CreatedAt,
|
337 |
UpdatedAt,
|
338 |
IsDeleted,
|
339 |
+
}
|
python/conf/sys.cnf
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
-
[
|
2 |
es=127.0.0.1:9200
|
3 |
-
idx_nm=toxic
|
4 |
pgdb_usr=root
|
5 |
-
pgdb_pwd=
|
6 |
pgdb_host=127.0.0.1
|
7 |
-
pgdb_port=
|
8 |
|
|
|
1 |
+
[infiniflow]
|
2 |
es=127.0.0.1:9200
|
|
|
3 |
pgdb_usr=root
|
4 |
+
pgdb_pwd=infiniflow_docgpt
|
5 |
pgdb_host=127.0.0.1
|
6 |
+
pgdb_port=5455
|
7 |
|
python/nlp/huchunk.py
CHANGED
@@ -359,6 +359,47 @@ class ExcelChunker(HuChunker):
|
|
359 |
return flds
|
360 |
|
361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
if __name__ == "__main__":
|
363 |
import sys
|
364 |
sys.path.append(os.path.dirname(__file__) + "/../")
|
|
|
359 |
return flds
|
360 |
|
361 |
|
362 |
+
class PptChunker(HuChunker):
|
363 |
+
|
364 |
+
@dataclass
|
365 |
+
class Fields:
|
366 |
+
text_chunks: List = None
|
367 |
+
table_chunks: List = None
|
368 |
+
|
369 |
+
def __init__(self):
|
370 |
+
super().__init__()
|
371 |
+
|
372 |
+
def __call__(self, fnm):
|
373 |
+
from pptx import Presentation
|
374 |
+
ppt = Presentation(fnm)
|
375 |
+
flds = self.Fields()
|
376 |
+
for slide in ppt.slides:
|
377 |
+
for shape in slide.shapes:
|
378 |
+
if hasattr(shape, "text"):
|
379 |
+
flds.text_chunks.append((shape.text, None))
|
380 |
+
flds.table_chunks = []
|
381 |
+
return flds
|
382 |
+
|
383 |
+
|
384 |
+
class TextChunker(HuChunker):
|
385 |
+
|
386 |
+
@dataclass
|
387 |
+
class Fields:
|
388 |
+
text_chunks: List = None
|
389 |
+
table_chunks: List = None
|
390 |
+
|
391 |
+
def __init__(self):
|
392 |
+
super().__init__()
|
393 |
+
|
394 |
+
def __call__(self, fnm):
|
395 |
+
flds = self.Fields()
|
396 |
+
with open(fnm, "r") as f:
|
397 |
+
txt = f.read()
|
398 |
+
flds.text_chunks = self.naive_text_chunk(txt)
|
399 |
+
flds.table_chunks = []
|
400 |
+
return flds
|
401 |
+
|
402 |
+
|
403 |
if __name__ == "__main__":
|
404 |
import sys
|
405 |
sys.path.append(os.path.dirname(__file__) + "/../")
|
python/svr/parse_user_docs.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json, re, sys, os, hashlib, copy, glob, util, time, random
|
2 |
+
from util.es_conn import HuEs, Postgres
|
3 |
+
from util import rmSpace, findMaxDt
|
4 |
+
from FlagEmbedding import FlagModel
|
5 |
+
from nlp import huchunk, huqie
|
6 |
+
import base64, hashlib
|
7 |
+
from io import BytesIO
|
8 |
+
from elasticsearch_dsl import Q
|
9 |
+
from parser import (
|
10 |
+
PdfParser,
|
11 |
+
DocxParser,
|
12 |
+
ExcelParser
|
13 |
+
)
|
14 |
+
from nlp.huchunk import (
|
15 |
+
PdfChunker,
|
16 |
+
DocxChunker,
|
17 |
+
ExcelChunker,
|
18 |
+
PptChunker,
|
19 |
+
TextChunker
|
20 |
+
)
|
21 |
+
|
22 |
+
ES = HuEs("infiniflow")
|
23 |
+
BATCH_SIZE = 64
|
24 |
+
PG = Postgres("infiniflow", "docgpt")
|
25 |
+
|
26 |
+
PDF = PdfChunker(PdfParser())
|
27 |
+
DOC = DocxChunker(DocxParser())
|
28 |
+
EXC = ExcelChunker(ExcelParser())
|
29 |
+
PPT = PptChunker()
|
30 |
+
|
31 |
+
|
32 |
+
def chuck_doc(name):
|
33 |
+
name = os.path.split(name)[-1].lower().split(".")[-1]
|
34 |
+
if name.find("pdf") >= 0: return PDF(name)
|
35 |
+
if name.find("doc") >= 0: return DOC(name)
|
36 |
+
if name.find("xlsx") >= 0: return EXC(name)
|
37 |
+
if name.find("ppt") >= 0: return PDF(name)
|
38 |
+
if name.find("pdf") >= 0: return PPT(name)
|
39 |
+
|
40 |
+
if re.match(r"(txt|csv)", name): return TextChunker(name)
|
41 |
+
|
42 |
+
|
43 |
+
def collect(comm, mod, tm):
|
44 |
+
sql = f"""
|
45 |
+
select
|
46 |
+
did,
|
47 |
+
uid,
|
48 |
+
doc_name,
|
49 |
+
location,
|
50 |
+
updated_at
|
51 |
+
from docinfo
|
52 |
+
where
|
53 |
+
updated_at >= '{tm}'
|
54 |
+
and kb_progress = 0
|
55 |
+
and type = 'doc'
|
56 |
+
and MOD(uid, {comm}) = {mod}
|
57 |
+
order by updated_at asc
|
58 |
+
limit 1000
|
59 |
+
"""
|
60 |
+
df = PG.select(sql)
|
61 |
+
df = df.fillna("")
|
62 |
+
mtm = str(df["updated_at"].max())[:19]
|
63 |
+
print("TOTAL:", len(df), "To: ", mtm)
|
64 |
+
return df, mtm
|
65 |
+
|
66 |
+
|
67 |
+
def set_progress(did, prog, msg):
|
68 |
+
sql = f"""
|
69 |
+
update docinfo set kb_progress={prog}, kb_progress_msg='{msg}' where did={did}
|
70 |
+
"""
|
71 |
+
PG.update(sql)
|
72 |
+
|
73 |
+
|
74 |
+
def build(row):
|
75 |
+
if row["size"] > 256000000:
|
76 |
+
set_progress(row["did"], -1, "File size exceeds( <= 256Mb )")
|
77 |
+
return []
|
78 |
+
doc = {
|
79 |
+
"doc_id": row["did"],
|
80 |
+
"title_tks": huqie.qie(os.path.split(row["location"])[-1]),
|
81 |
+
"updated_at": row["updated_at"]
|
82 |
+
}
|
83 |
+
random.seed(time.time())
|
84 |
+
set_progress(row["did"], random.randint(0, 20)/100., "Finished preparing! Start to slice file!")
|
85 |
+
obj = chuck_doc(row["location"])
|
86 |
+
if not obj:
|
87 |
+
set_progress(row["did"], -1, "Unsuported file type.")
|
88 |
+
return []
|
89 |
+
|
90 |
+
set_progress(row["did"], random.randint(20, 60)/100.)
|
91 |
+
|
92 |
+
output_buffer = BytesIO()
|
93 |
+
docs = []
|
94 |
+
md5 = hashlib.md5()
|
95 |
+
for txt, img in obj.text_chunks:
|
96 |
+
d = copy.deepcopy(doc)
|
97 |
+
md5.update((txt + str(d["doc_id"])).encode("utf-8"))
|
98 |
+
d["_id"] = md5.hexdigest()
|
99 |
+
d["content_ltks"] = huqie.qie(txt)
|
100 |
+
d["docnm_kwd"] = rmSpace(d["docnm_tks"])
|
101 |
+
if not img:
|
102 |
+
docs.append(d)
|
103 |
+
continue
|
104 |
+
img.save(output_buffer, format='JPEG')
|
105 |
+
d["img_bin"] = base64.b64encode(output_buffer.getvalue())
|
106 |
+
docs.append(d)
|
107 |
+
|
108 |
+
for arr, img in obj.table_chunks:
|
109 |
+
for i, txt in enumerate(arr):
|
110 |
+
d = copy.deepcopy(doc)
|
111 |
+
d["content_ltks"] = huqie.qie(txt)
|
112 |
+
md5.update((txt + str(d["doc_id"])).encode("utf-8"))
|
113 |
+
d["_id"] = md5.hexdigest()
|
114 |
+
if not img:
|
115 |
+
docs.append(d)
|
116 |
+
continue
|
117 |
+
img.save(output_buffer, format='JPEG')
|
118 |
+
d["img_bin"] = base64.b64encode(output_buffer.getvalue())
|
119 |
+
docs.append(d)
|
120 |
+
set_progress(row["did"], random.randint(60, 70)/100., "Finished slicing. Start to embedding the content.")
|
121 |
+
|
122 |
+
return docs
|
123 |
+
|
124 |
+
|
125 |
+
def index_name(uid):return f"docgpt_{uid}"
|
126 |
+
|
127 |
+
def init_kb(row):
|
128 |
+
idxnm = index_name(row["uid"])
|
129 |
+
if ES.indexExist(idxnm): return
|
130 |
+
return ES.createIdx(idxnm, json.load(open("res/mapping.json", "r")))
|
131 |
+
|
132 |
+
|
133 |
+
model = None
|
134 |
+
def embedding(docs):
|
135 |
+
global model
|
136 |
+
tts = model.encode([rmSpace(d["title_tks"]) for d in docs])
|
137 |
+
cnts = model.encode([rmSpace(d["content_ltks"]) for d in docs])
|
138 |
+
vects = 0.1 * tts + 0.9 * cnts
|
139 |
+
assert len(vects) == len(docs)
|
140 |
+
for i,d in enumerate(docs):d["q_vec"] = vects[i].tolist()
|
141 |
+
for d in docs:
|
142 |
+
set_progress(d["doc_id"], random.randint(70, 95)/100.,
|
143 |
+
"Finished embedding! Start to build index!")
|
144 |
+
|
145 |
+
|
146 |
+
def main(comm, mod):
|
147 |
+
tm_fnm = f"res/{comm}-{mod}.tm"
|
148 |
+
tmf = open(tm_fnm, "a+")
|
149 |
+
tm = findMaxDt(tm_fnm)
|
150 |
+
rows, tm = collect(comm, mod, tm)
|
151 |
+
for r in rows:
|
152 |
+
if r["is_deleted"]:
|
153 |
+
ES.deleteByQuery(Q("term", dock_id=r["did"]), index_name(r["uid"]))
|
154 |
+
continue
|
155 |
+
|
156 |
+
cks = build(r)
|
157 |
+
## TODO: exception handler
|
158 |
+
## set_progress(r["did"], -1, "ERROR: ")
|
159 |
+
embedding(cks)
|
160 |
+
if cks: init_kb(r)
|
161 |
+
ES.bulk(cks, index_name(r["uid"]))
|
162 |
+
tmf.write(str(r["updated_at"]) + "\n")
|
163 |
+
tmf.close()
|
164 |
+
|
165 |
+
|
166 |
+
if __name__ == "__main__":
|
167 |
+
from mpi4py import MPI
|
168 |
+
comm = MPI.COMM_WORLD
|
169 |
+
rank = comm.Get_rank()
|
170 |
+
main(comm, rank)
|
171 |
+
|
python/util/__init__.py
CHANGED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def rmSpace(txt):
|
4 |
+
txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt)
|
5 |
+
return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt)
|
6 |
+
|
7 |
+
def findMaxDt(fnm):
|
8 |
+
m = "1970-01-01 00:00:00"
|
9 |
+
try:
|
10 |
+
with open(fnm, "r") as f:
|
11 |
+
while True:
|
12 |
+
l = f.readline()
|
13 |
+
if not l:break
|
14 |
+
l = l.strip("\n")
|
15 |
+
if l == 'nan':continue
|
16 |
+
if l > m:m = l
|
17 |
+
except Exception as e:
|
18 |
+
print("WARNING: can't find "+ fnm)
|
19 |
+
return m
|
python/util/config.py
CHANGED
@@ -9,7 +9,6 @@ if not os.path.exists(__fnm): __fnm = "./sys.cnf"
|
|
9 |
|
10 |
CF.read(__fnm)
|
11 |
|
12 |
-
|
13 |
class Config:
|
14 |
def __init__(self, env):
|
15 |
self.env = env
|
|
|
9 |
|
10 |
CF.read(__fnm)
|
11 |
|
|
|
12 |
class Config:
|
13 |
def __init__(self, env):
|
14 |
self.env = env
|
python/util/db_conn.py
CHANGED
@@ -3,7 +3,7 @@ import time
|
|
3 |
from util import config
|
4 |
import pandas as pd
|
5 |
|
6 |
-
class
|
7 |
def __init__(self, env, dbnm):
|
8 |
self.config = config.init(env)
|
9 |
self.conn = None
|
@@ -36,9 +36,28 @@ class Postgre(object):
|
|
36 |
try:
|
37 |
return pd.read_sql(sql, self.conn)
|
38 |
except Exception as e:
|
39 |
-
logging.error(f"Fail to exec {sql}
|
40 |
self.__open__()
|
41 |
time.sleep(1)
|
42 |
|
43 |
return pd.DataFrame()
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from util import config
|
4 |
import pandas as pd
|
5 |
|
6 |
+
class Postgres(object):
|
7 |
def __init__(self, env, dbnm):
|
8 |
self.config = config.init(env)
|
9 |
self.conn = None
|
|
|
36 |
try:
|
37 |
return pd.read_sql(sql, self.conn)
|
38 |
except Exception as e:
|
39 |
+
logging.error(f"Fail to exec {sql} "+str(e))
|
40 |
self.__open__()
|
41 |
time.sleep(1)
|
42 |
|
43 |
return pd.DataFrame()
|
44 |
|
45 |
+
|
46 |
+
def update(self, sql):
|
47 |
+
for _ in range(10):
|
48 |
+
try:
|
49 |
+
cur = self.conn.cursor()
|
50 |
+
cur.execute(sql)
|
51 |
+
updated_rows = cur.rowcount
|
52 |
+
conn.commit()
|
53 |
+
cur.close()
|
54 |
+
return updated_rows
|
55 |
+
except Exception as e:
|
56 |
+
logging.error(f"Fail to exec {sql} "+str(e))
|
57 |
+
self.__open__()
|
58 |
+
time.sleep(1)
|
59 |
+
return 0
|
60 |
+
|
61 |
+
if __name__ == "__main__":
|
62 |
+
Postgres("infiniflow", "docgpt")
|
63 |
+
|
python/util/es_conn.py
CHANGED
@@ -31,7 +31,7 @@ class HuEs:
|
|
31 |
self.info = {}
|
32 |
self.config = config.init(env)
|
33 |
self.conn()
|
34 |
-
self.idxnm = self.config.get("idx_nm")
|
35 |
if not self.es.ping():
|
36 |
raise Exception("Can't connect to ES cluster")
|
37 |
|
|
|
31 |
self.info = {}
|
32 |
self.config = config.init(env)
|
33 |
self.conn()
|
34 |
+
self.idxnm = self.config.get("idx_nm","")
|
35 |
if not self.es.ping():
|
36 |
raise Exception("Can't connect to ES cluster")
|
37 |
|
src/api/doc_info.rs
CHANGED
@@ -1,15 +1,20 @@
|
|
1 |
use std::collections::HashMap;
|
2 |
-
use
|
|
|
|
|
|
|
3 |
use actix_web::{get, HttpResponse, post, web};
|
|
|
4 |
use chrono::Local;
|
5 |
use futures_util::StreamExt;
|
6 |
-
use
|
7 |
-
use std::io::Write;
|
8 |
use crate::api::JsonResponse;
|
9 |
use crate::AppState;
|
10 |
use crate::entity::doc_info::Model;
|
11 |
use crate::errors::AppError;
|
12 |
use crate::service::doc_info::{Mutation, Query};
|
|
|
|
|
13 |
|
14 |
#[derive(Debug, Deserialize)]
|
15 |
pub struct Params {
|
@@ -53,41 +58,54 @@ async fn list(params: web::Json<Params>, data: web::Data<AppState>) -> Result<Ht
|
|
53 |
.body(serde_json::to_string(&json_response)?))
|
54 |
}
|
55 |
|
56 |
-
#[
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
let filepath = format!("./uploads/{}", filename.as_str());
|
64 |
-
|
65 |
-
let mut file = web::block(|| std::fs::File::create(filepath))
|
66 |
-
.await
|
67 |
-
.unwrap()?;
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
}
|
|
|
76 |
}
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
kb_infos: Vec::new(),
|
84 |
kb_progress: 0.0,
|
85 |
-
|
86 |
-
|
|
|
87 |
created_at: Local::now().date_naive(),
|
88 |
updated_at: Local::now().date_naive(),
|
89 |
}).await?;
|
90 |
|
|
|
|
|
91 |
Ok(HttpResponse::Ok().body("File uploaded successfully"))
|
92 |
}
|
93 |
|
@@ -121,4 +139,4 @@ async fn mv(params: web::Json<MvParams>, data: web::Data<AppState>) -> Result<Ht
|
|
121 |
Ok(HttpResponse::Ok()
|
122 |
.content_type("application/json")
|
123 |
.body(serde_json::to_string(&json_response)?))
|
124 |
-
}
|
|
|
1 |
use std::collections::HashMap;
|
2 |
+
use std::io::Write;
|
3 |
+
use std::slice::Chunks;
|
4 |
+
//use actix_multipart::{Multipart, MultipartError, Field};
|
5 |
+
use actix_multipart_extract::{File, Multipart, MultipartForm};
|
6 |
use actix_web::{get, HttpResponse, post, web};
|
7 |
+
use actix_web::web::Bytes;
|
8 |
use chrono::Local;
|
9 |
use futures_util::StreamExt;
|
10 |
+
use sea_orm::DbConn;
|
|
|
11 |
use crate::api::JsonResponse;
|
12 |
use crate::AppState;
|
13 |
use crate::entity::doc_info::Model;
|
14 |
use crate::errors::AppError;
|
15 |
use crate::service::doc_info::{Mutation, Query};
|
16 |
+
use serde::Deserialize;
|
17 |
+
|
18 |
|
19 |
#[derive(Debug, Deserialize)]
|
20 |
pub struct Params {
|
|
|
58 |
.body(serde_json::to_string(&json_response)?))
|
59 |
}
|
60 |
|
61 |
+
#[derive(Deserialize, MultipartForm, Debug)]
|
62 |
+
pub struct UploadForm {
|
63 |
+
#[multipart(max_size = 512MB)]
|
64 |
+
file_field: File,
|
65 |
+
uid: i64,
|
66 |
+
did: i64
|
67 |
+
}
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
#[post("/v1.0/upload")]
|
70 |
+
async fn upload(payload: Multipart<UploadForm>, data: web::Data<AppState>) -> Result<HttpResponse, AppError> {
|
71 |
+
let uid = payload.uid;
|
72 |
+
async fn add_number_to_filename(file_name: String, conn:&DbConn, uid:i64) -> String {
|
73 |
+
let mut i = 0;
|
74 |
+
let mut new_file_name = file_name.to_string();
|
75 |
+
let arr: Vec<&str> = file_name.split(".").collect();
|
76 |
+
let suffix = String::from(arr[arr.len()-1]);
|
77 |
+
let preffix = arr[..arr.len()-1].join(".");
|
78 |
+
let mut docs = Query::find_doc_infos_by_name(conn, uid, new_file_name.clone()).await.unwrap();
|
79 |
+
while docs.len()>0 {
|
80 |
+
i += 1;
|
81 |
+
new_file_name = format!("{}_{}.{}", preffix, i, suffix);
|
82 |
+
docs = Query::find_doc_infos_by_name(conn, uid, new_file_name.clone()).await.unwrap();
|
83 |
}
|
84 |
+
new_file_name
|
85 |
}
|
86 |
+
let fnm = add_number_to_filename(payload.file_field.name.clone(), &data.conn, uid).await;
|
87 |
+
|
88 |
+
std::fs::create_dir_all(format!("./upload/{}/", uid));
|
89 |
+
let filepath = format!("./upload/{}/{}-{}", payload.uid, payload.did, fnm.clone());
|
90 |
+
let mut f =std::fs::File::create(&filepath)?;
|
91 |
+
f.write(&payload.file_field.bytes)?;
|
92 |
+
|
93 |
+
let doc = Mutation::create_doc_info(&data.conn, Model {
|
94 |
+
did:Default::default(),
|
95 |
+
uid: uid,
|
96 |
+
doc_name: fnm,
|
97 |
+
size: payload.file_field.bytes.len() as i64,
|
98 |
kb_infos: Vec::new(),
|
99 |
kb_progress: 0.0,
|
100 |
+
kb_progress_msg: "".to_string(),
|
101 |
+
location: filepath,
|
102 |
+
r#type: "doc".to_string(),
|
103 |
created_at: Local::now().date_naive(),
|
104 |
updated_at: Local::now().date_naive(),
|
105 |
}).await?;
|
106 |
|
107 |
+
let _ = Mutation::place_doc(&data.conn, payload.did, doc.did.unwrap()).await?;
|
108 |
+
|
109 |
Ok(HttpResponse::Ok().body("File uploaded successfully"))
|
110 |
}
|
111 |
|
|
|
139 |
Ok(HttpResponse::Ok()
|
140 |
.content_type("application/json")
|
141 |
.body(serde_json::to_string(&json_response)?))
|
142 |
+
}
|
src/api/kb_info.rs
CHANGED
@@ -1,23 +1,58 @@
|
|
1 |
use std::collections::HashMap;
|
2 |
use actix_web::{get, HttpResponse, post, web};
|
|
|
3 |
use crate::api::JsonResponse;
|
4 |
use crate::AppState;
|
5 |
use crate::entity::kb_info;
|
6 |
use crate::errors::AppError;
|
7 |
use crate::service::kb_info::Mutation;
|
8 |
use crate::service::kb_info::Query;
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
#[post("/v1.0/create_kb")]
|
11 |
async fn create(model: web::Json<kb_info::Model>, data: web::Data<AppState>) -> Result<HttpResponse, AppError> {
|
12 |
-
let
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
let json_response = JsonResponse {
|
18 |
code: 200,
|
19 |
err: "".to_owned(),
|
20 |
-
data:
|
21 |
};
|
22 |
|
23 |
Ok(HttpResponse::Ok()
|
|
|
1 |
use std::collections::HashMap;
|
2 |
use actix_web::{get, HttpResponse, post, web};
|
3 |
+
use serde::Serialize;
|
4 |
use crate::api::JsonResponse;
|
5 |
use crate::AppState;
|
6 |
use crate::entity::kb_info;
|
7 |
use crate::errors::AppError;
|
8 |
use crate::service::kb_info::Mutation;
|
9 |
use crate::service::kb_info::Query;
|
10 |
+
use serde::Deserialize;
|
11 |
|
12 |
+
#[derive(Clone, Debug, Serialize, Deserialize)]
|
13 |
+
pub struct AddDocs2KbParams {
|
14 |
+
pub uid: i64,
|
15 |
+
pub dids: Vec<i64>,
|
16 |
+
pub kb_id: i64,
|
17 |
+
}
|
18 |
#[post("/v1.0/create_kb")]
|
19 |
async fn create(model: web::Json<kb_info::Model>, data: web::Data<AppState>) -> Result<HttpResponse, AppError> {
|
20 |
+
let mut docs = Query::find_kb_infos_by_name(&data.conn, model.kb_name.to_owned()).await.unwrap();
|
21 |
+
if docs.len() >0 {
|
22 |
+
let json_response = JsonResponse {
|
23 |
+
code: 201,
|
24 |
+
err: "Duplicated name.".to_owned(),
|
25 |
+
data: ()
|
26 |
+
};
|
27 |
+
Ok(HttpResponse::Ok()
|
28 |
+
.content_type("application/json")
|
29 |
+
.body(serde_json::to_string(&json_response)?))
|
30 |
+
}else{
|
31 |
+
let model = Mutation::create_kb_info(&data.conn, model.into_inner()).await?;
|
32 |
|
33 |
+
let mut result = HashMap::new();
|
34 |
+
result.insert("kb_id", model.kb_id.unwrap());
|
35 |
+
|
36 |
+
let json_response = JsonResponse {
|
37 |
+
code: 200,
|
38 |
+
err: "".to_owned(),
|
39 |
+
data: result,
|
40 |
+
};
|
41 |
+
|
42 |
+
Ok(HttpResponse::Ok()
|
43 |
+
.content_type("application/json")
|
44 |
+
.body(serde_json::to_string(&json_response)?))
|
45 |
+
}
|
46 |
+
}
|
47 |
+
|
48 |
+
#[post("/v1.0/add_docs_to_kb")]
|
49 |
+
async fn add_docs_to_kb(param: web::Json<AddDocs2KbParams>, data: web::Data<AppState>) -> Result<HttpResponse, AppError> {
|
50 |
+
let _ = Mutation::add_docs(&data.conn, param.kb_id, param.dids.to_owned()).await?;
|
51 |
|
52 |
let json_response = JsonResponse {
|
53 |
code: 200,
|
54 |
err: "".to_owned(),
|
55 |
+
data: (),
|
56 |
};
|
57 |
|
58 |
Ok(HttpResponse::Ok()
|
src/api/tag.rs
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
use std::collections::HashMap;
|
2 |
+
use actix_web::{get, HttpResponse, post, web};
|
3 |
+
use actix_web::http::Error;
|
4 |
+
use crate::api::JsonResponse;
|
5 |
+
use crate::AppState;
|
6 |
+
use crate::entity::tag_info;
|
7 |
+
use crate::service::tag_info::{Mutation, Query};
|
8 |
+
|
9 |
+
#[post("/v1.0/create_tag")]
|
10 |
+
async fn create(model: web::Json<tag_info::Model>, data: web::Data<AppState>) -> Result<HttpResponse, Error> {
|
11 |
+
let model = Mutation::create_tag(&data.conn, model.into_inner()).await.unwrap();
|
12 |
+
|
13 |
+
let mut result = HashMap::new();
|
14 |
+
result.insert("tid", model.tid.unwrap());
|
15 |
+
|
16 |
+
let json_response = JsonResponse {
|
17 |
+
code: 200,
|
18 |
+
err: "".to_owned(),
|
19 |
+
data: result,
|
20 |
+
};
|
21 |
+
|
22 |
+
Ok(HttpResponse::Ok()
|
23 |
+
.content_type("application/json")
|
24 |
+
.body(serde_json::to_string(&json_response).unwrap()))
|
25 |
+
}
|
26 |
+
|
27 |
+
#[post("/v1.0/delete_tag")]
|
28 |
+
async fn delete(model: web::Json<tag_info::Model>, data: web::Data<AppState>) -> Result<HttpResponse, Error> {
|
29 |
+
let _ = Mutation::delete_tag(&data.conn, model.tid).await.unwrap();
|
30 |
+
|
31 |
+
let json_response = JsonResponse {
|
32 |
+
code: 200,
|
33 |
+
err: "".to_owned(),
|
34 |
+
data: (),
|
35 |
+
};
|
36 |
+
|
37 |
+
Ok(HttpResponse::Ok()
|
38 |
+
.content_type("application/json")
|
39 |
+
.body(serde_json::to_string(&json_response).unwrap()))
|
40 |
+
}
|
41 |
+
|
42 |
+
#[get("/v1.0/tags")]
|
43 |
+
async fn list(data: web::Data<AppState>) -> Result<HttpResponse, Error> {
|
44 |
+
let tags = Query::find_tag_infos(&data.conn).await.unwrap();
|
45 |
+
|
46 |
+
let mut result = HashMap::new();
|
47 |
+
result.insert("tags", tags);
|
48 |
+
|
49 |
+
let json_response = JsonResponse {
|
50 |
+
code: 200,
|
51 |
+
err: "".to_owned(),
|
52 |
+
data: result,
|
53 |
+
};
|
54 |
+
|
55 |
+
Ok(HttpResponse::Ok()
|
56 |
+
.content_type("application/json")
|
57 |
+
.body(serde_json::to_string(&json_response).unwrap()))
|
58 |
+
}
|
src/entity/dialog2_kb.rs
CHANGED
@@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "dialog2_kb")]
|
6 |
pub struct Model {
|
7 |
-
#[sea_orm(primary_key, auto_increment =
|
|
|
8 |
#[sea_orm(index)]
|
9 |
pub dialog_id: i64,
|
10 |
-
#[sea_orm(
|
11 |
pub kb_id: i64,
|
12 |
}
|
13 |
|
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "dialog2_kb")]
|
6 |
pub struct Model {
|
7 |
+
#[sea_orm(primary_key, auto_increment = true)]
|
8 |
+
pub id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub dialog_id: i64,
|
11 |
+
#[sea_orm(index)]
|
12 |
pub kb_id: i64,
|
13 |
}
|
14 |
|
src/entity/doc2_doc.rs
CHANGED
@@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "doc2_doc")]
|
6 |
pub struct Model {
|
7 |
-
#[sea_orm(primary_key, auto_increment =
|
|
|
8 |
#[sea_orm(index)]
|
9 |
pub parent_id: i64,
|
10 |
-
#[sea_orm(
|
11 |
pub did: i64,
|
12 |
}
|
13 |
|
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "doc2_doc")]
|
6 |
pub struct Model {
|
7 |
+
#[sea_orm(primary_key, auto_increment = true)]
|
8 |
+
pub id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub parent_id: i64,
|
11 |
+
#[sea_orm(index)]
|
12 |
pub did: i64,
|
13 |
}
|
14 |
|
src/entity/doc_info.rs
CHANGED
@@ -10,10 +10,11 @@ pub struct Model {
|
|
10 |
#[sea_orm(index)]
|
11 |
pub uid: i64,
|
12 |
pub doc_name: String,
|
13 |
-
pub size:
|
14 |
#[sea_orm(column_name = "type")]
|
15 |
pub r#type: String,
|
16 |
-
pub kb_progress:
|
|
|
17 |
pub location: String,
|
18 |
#[sea_orm(ignore)]
|
19 |
pub kb_infos: Vec<kb_info::Model>,
|
@@ -57,4 +58,4 @@ impl Related<Entity> for Entity {
|
|
57 |
}
|
58 |
}
|
59 |
|
60 |
-
impl ActiveModelBehavior for ActiveModel {}
|
|
|
10 |
#[sea_orm(index)]
|
11 |
pub uid: i64,
|
12 |
pub doc_name: String,
|
13 |
+
pub size: i64,
|
14 |
#[sea_orm(column_name = "type")]
|
15 |
pub r#type: String,
|
16 |
+
pub kb_progress: f32,
|
17 |
+
pub kb_progress_msg: String,
|
18 |
pub location: String,
|
19 |
#[sea_orm(ignore)]
|
20 |
pub kb_infos: Vec<kb_info::Model>,
|
|
|
58 |
}
|
59 |
}
|
60 |
|
61 |
+
impl ActiveModelBehavior for ActiveModel {}
|
src/entity/kb2_doc.rs
CHANGED
@@ -4,11 +4,12 @@ use serde::{Deserialize, Serialize};
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "kb2_doc")]
|
6 |
pub struct Model {
|
7 |
-
#[sea_orm(primary_key, auto_increment =
|
|
|
8 |
#[sea_orm(index)]
|
9 |
pub kb_id: i64,
|
10 |
-
#[sea_orm(
|
11 |
-
pub
|
12 |
}
|
13 |
|
14 |
#[derive(Debug, Clone, Copy, EnumIter)]
|
@@ -21,8 +22,8 @@ impl RelationTrait for Relation {
|
|
21 |
fn def(&self) -> RelationDef {
|
22 |
match self {
|
23 |
Self::DocInfo => Entity::belongs_to(super::doc_info::Entity)
|
24 |
-
.from(Column::
|
25 |
-
.to(super::doc_info::Column::
|
26 |
.into(),
|
27 |
Self::KbInfo => Entity::belongs_to(super::kb_info::Entity)
|
28 |
.from(Column::KbId)
|
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "kb2_doc")]
|
6 |
pub struct Model {
|
7 |
+
#[sea_orm(primary_key, auto_increment = true)]
|
8 |
+
pub id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub kb_id: i64,
|
11 |
+
#[sea_orm(index)]
|
12 |
+
pub did: i64,
|
13 |
}
|
14 |
|
15 |
#[derive(Debug, Clone, Copy, EnumIter)]
|
|
|
22 |
fn def(&self) -> RelationDef {
|
23 |
match self {
|
24 |
Self::DocInfo => Entity::belongs_to(super::doc_info::Entity)
|
25 |
+
.from(Column::Did)
|
26 |
+
.to(super::doc_info::Column::Did)
|
27 |
.into(),
|
28 |
Self::KbInfo => Entity::belongs_to(super::kb_info::Entity)
|
29 |
.from(Column::KbId)
|
src/entity/kb_info.rs
CHANGED
@@ -8,8 +8,8 @@ pub struct Model {
|
|
8 |
pub kb_id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub uid: i64,
|
11 |
-
pub
|
12 |
-
pub icon:
|
13 |
|
14 |
#[serde(skip_deserializing)]
|
15 |
pub created_at: Date,
|
|
|
8 |
pub kb_id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub uid: i64,
|
11 |
+
pub kb_name: String,
|
12 |
+
pub icon: i16,
|
13 |
|
14 |
#[serde(skip_deserializing)]
|
15 |
pub created_at: Date,
|
src/entity/mod.rs
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
pub(crate) mod user_info;
|
2 |
pub(crate) mod tag_info;
|
3 |
-
mod tag2_doc;
|
4 |
-
mod kb2_doc;
|
5 |
-
mod dialog2_kb;
|
6 |
pub(crate) mod doc2_doc;
|
7 |
pub(crate) mod kb_info;
|
8 |
pub(crate) mod doc_info;
|
|
|
1 |
pub(crate) mod user_info;
|
2 |
pub(crate) mod tag_info;
|
3 |
+
pub(crate) mod tag2_doc;
|
4 |
+
pub(crate) mod kb2_doc;
|
5 |
+
pub(crate) mod dialog2_kb;
|
6 |
pub(crate) mod doc2_doc;
|
7 |
pub(crate) mod kb_info;
|
8 |
pub(crate) mod doc_info;
|
src/entity/tag2_doc.rs
CHANGED
@@ -4,10 +4,11 @@ use serde::{Deserialize, Serialize};
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "tag2_doc")]
|
6 |
pub struct Model {
|
7 |
-
#[sea_orm(primary_key, auto_increment =
|
|
|
8 |
#[sea_orm(index)]
|
9 |
pub tag_id: i64,
|
10 |
-
#[sea_orm(
|
11 |
pub uid: i64,
|
12 |
}
|
13 |
|
|
|
4 |
#[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel, Deserialize, Serialize)]
|
5 |
#[sea_orm(table_name = "tag2_doc")]
|
6 |
pub struct Model {
|
7 |
+
#[sea_orm(primary_key, auto_increment = true)]
|
8 |
+
pub id: i64,
|
9 |
#[sea_orm(index)]
|
10 |
pub tag_id: i64,
|
11 |
+
#[sea_orm(index)]
|
12 |
pub uid: i64,
|
13 |
}
|
14 |
|
src/entity/tag_info.rs
CHANGED
@@ -10,8 +10,8 @@ pub struct Model {
|
|
10 |
pub uid: i64,
|
11 |
pub tag_name: String,
|
12 |
pub regx: Option<String>,
|
13 |
-
pub color:
|
14 |
-
pub icon:
|
15 |
pub dir: Option<String>,
|
16 |
|
17 |
#[serde(skip_deserializing)]
|
|
|
10 |
pub uid: i64,
|
11 |
pub tag_name: String,
|
12 |
pub regx: Option<String>,
|
13 |
+
pub color: u16,
|
14 |
+
pub icon: u16,
|
15 |
pub dir: Option<String>,
|
16 |
|
17 |
#[serde(skip_deserializing)]
|
src/main.rs
CHANGED
@@ -97,6 +97,7 @@ fn init(cfg: &mut web::ServiceConfig) {
|
|
97 |
cfg.service(api::kb_info::create);
|
98 |
cfg.service(api::kb_info::delete);
|
99 |
cfg.service(api::kb_info::list);
|
|
|
100 |
|
101 |
cfg.service(api::doc_info::list);
|
102 |
cfg.service(api::doc_info::delete);
|
|
|
97 |
cfg.service(api::kb_info::create);
|
98 |
cfg.service(api::kb_info::delete);
|
99 |
cfg.service(api::kb_info::list);
|
100 |
+
cfg.service(api::kb_info::add_docs_to_kb);
|
101 |
|
102 |
cfg.service(api::doc_info::list);
|
103 |
cfg.service(api::doc_info::delete);
|
src/service/doc_info.rs
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
use chrono::Local;
|
2 |
-
use sea_orm::{ActiveModelTrait, ColumnTrait, DbConn, DbErr, DeleteResult, EntityTrait, PaginatorTrait, QueryOrder};
|
3 |
use sea_orm::ActiveValue::Set;
|
4 |
use sea_orm::QueryFilter;
|
5 |
use crate::api::doc_info::Params;
|
@@ -24,6 +24,14 @@ impl Query {
|
|
24 |
.await
|
25 |
}
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
pub async fn find_doc_infos_by_params(db: &DbConn, params: Params) -> Result<Vec<doc_info::Model>, DbErr> {
|
28 |
// Setup paginator
|
29 |
let paginator = Entity::find();
|
@@ -80,18 +88,34 @@ impl Mutation {
|
|
80 |
dids: &[i64]
|
81 |
) -> Result<(), DbErr> {
|
82 |
for did in dids {
|
|
|
|
|
83 |
let _ = doc2_doc::ActiveModel {
|
84 |
-
|
85 |
-
did: Set(
|
|
|
86 |
}
|
87 |
-
.
|
88 |
-
.await
|
89 |
-
.unwrap();
|
90 |
}
|
91 |
|
92 |
Ok(())
|
93 |
}
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
pub async fn create_doc_info(
|
96 |
db: &DbConn,
|
97 |
form_data: doc_info::Model,
|
@@ -103,6 +127,7 @@ impl Mutation {
|
|
103 |
size: Set(form_data.size.to_owned()),
|
104 |
r#type: Set(form_data.r#type.to_owned()),
|
105 |
kb_progress: Set(form_data.kb_progress.to_owned()),
|
|
|
106 |
location: Set(form_data.location.to_owned()),
|
107 |
created_at: Set(Local::now().date_naive()),
|
108 |
updated_at: Set(Local::now().date_naive()),
|
@@ -129,6 +154,7 @@ impl Mutation {
|
|
129 |
size: Set(form_data.size.to_owned()),
|
130 |
r#type: Set(form_data.r#type.to_owned()),
|
131 |
kb_progress: Set(form_data.kb_progress.to_owned()),
|
|
|
132 |
location: Set(form_data.location.to_owned()),
|
133 |
created_at: Default::default(),
|
134 |
updated_at: Set(Local::now().date_naive()),
|
@@ -150,4 +176,4 @@ impl Mutation {
|
|
150 |
pub async fn delete_all_doc_infos(db: &DbConn) -> Result<DeleteResult, DbErr> {
|
151 |
Entity::delete_many().exec(db).await
|
152 |
}
|
153 |
-
}
|
|
|
1 |
use chrono::Local;
|
2 |
+
use sea_orm::{ActiveModelTrait, ColumnTrait, DbConn, DbErr, DeleteResult, EntityTrait, PaginatorTrait, QueryOrder, Unset, Unchanged, ConditionalStatement};
|
3 |
use sea_orm::ActiveValue::Set;
|
4 |
use sea_orm::QueryFilter;
|
5 |
use crate::api::doc_info::Params;
|
|
|
24 |
.await
|
25 |
}
|
26 |
|
27 |
+
pub async fn find_doc_infos_by_name(db: &DbConn, uid: i64, name: String) -> Result<Vec<doc_info::Model>, DbErr> {
|
28 |
+
Entity::find()
|
29 |
+
.filter(doc_info::Column::DocName.eq(name))
|
30 |
+
.filter(doc_info::Column::Uid.eq(uid))
|
31 |
+
.all(db)
|
32 |
+
.await
|
33 |
+
}
|
34 |
+
|
35 |
pub async fn find_doc_infos_by_params(db: &DbConn, params: Params) -> Result<Vec<doc_info::Model>, DbErr> {
|
36 |
// Setup paginator
|
37 |
let paginator = Entity::find();
|
|
|
88 |
dids: &[i64]
|
89 |
) -> Result<(), DbErr> {
|
90 |
for did in dids {
|
91 |
+
let d = doc2_doc::Entity::find().filter(doc2_doc::Column::Did.eq(did.to_owned())).all(db).await?;
|
92 |
+
|
93 |
let _ = doc2_doc::ActiveModel {
|
94 |
+
id: Set(d[0].id),
|
95 |
+
did: Set(did.to_owned()),
|
96 |
+
parent_id: Set(dest_did)
|
97 |
}
|
98 |
+
.update(db)
|
99 |
+
.await?;
|
|
|
100 |
}
|
101 |
|
102 |
Ok(())
|
103 |
}
|
104 |
|
105 |
+
pub async fn place_doc(
|
106 |
+
db: &DbConn,
|
107 |
+
dest_did: i64,
|
108 |
+
did: i64
|
109 |
+
) -> Result<doc2_doc::ActiveModel, DbErr> {
|
110 |
+
doc2_doc::ActiveModel {
|
111 |
+
id: Default::default(),
|
112 |
+
parent_id: Set(dest_did),
|
113 |
+
did: Set(did),
|
114 |
+
}
|
115 |
+
.save(db)
|
116 |
+
.await
|
117 |
+
}
|
118 |
+
|
119 |
pub async fn create_doc_info(
|
120 |
db: &DbConn,
|
121 |
form_data: doc_info::Model,
|
|
|
127 |
size: Set(form_data.size.to_owned()),
|
128 |
r#type: Set(form_data.r#type.to_owned()),
|
129 |
kb_progress: Set(form_data.kb_progress.to_owned()),
|
130 |
+
kb_progress_msg: Set(form_data.kb_progress_msg.to_owned()),
|
131 |
location: Set(form_data.location.to_owned()),
|
132 |
created_at: Set(Local::now().date_naive()),
|
133 |
updated_at: Set(Local::now().date_naive()),
|
|
|
154 |
size: Set(form_data.size.to_owned()),
|
155 |
r#type: Set(form_data.r#type.to_owned()),
|
156 |
kb_progress: Set(form_data.kb_progress.to_owned()),
|
157 |
+
kb_progress_msg: Set(form_data.kb_progress_msg.to_owned()),
|
158 |
location: Set(form_data.location.to_owned()),
|
159 |
created_at: Default::default(),
|
160 |
updated_at: Set(Local::now().date_naive()),
|
|
|
176 |
pub async fn delete_all_doc_infos(db: &DbConn) -> Result<DeleteResult, DbErr> {
|
177 |
Entity::delete_many().exec(db).await
|
178 |
}
|
179 |
+
}
|
src/service/kb_info.rs
CHANGED
@@ -2,6 +2,7 @@ use chrono::Local;
|
|
2 |
use sea_orm::{ActiveModelTrait, ColumnTrait, DbConn, DbErr, DeleteResult, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder};
|
3 |
use sea_orm::ActiveValue::Set;
|
4 |
use crate::entity::kb_info;
|
|
|
5 |
use crate::entity::kb_info::Entity;
|
6 |
|
7 |
pub struct Query;
|
@@ -21,6 +22,13 @@ impl Query {
|
|
21 |
.all(db)
|
22 |
.await
|
23 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
pub async fn find_kb_infos_in_page(
|
26 |
db: &DbConn,
|
@@ -48,7 +56,7 @@ impl Mutation {
|
|
48 |
kb_info::ActiveModel {
|
49 |
kb_id: Default::default(),
|
50 |
uid: Set(form_data.uid.to_owned()),
|
51 |
-
|
52 |
icon: Set(form_data.icon.to_owned()),
|
53 |
created_at: Set(Local::now().date_naive()),
|
54 |
updated_at: Set(Local::now().date_naive()),
|
@@ -57,6 +65,24 @@ impl Mutation {
|
|
57 |
.await
|
58 |
}
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
pub async fn update_kb_info_by_id(
|
61 |
db: &DbConn,
|
62 |
id: i64,
|
@@ -71,7 +97,7 @@ impl Mutation {
|
|
71 |
kb_info::ActiveModel {
|
72 |
kb_id: kb_info.kb_id,
|
73 |
uid: kb_info.uid,
|
74 |
-
|
75 |
icon: Set(form_data.icon.to_owned()),
|
76 |
created_at: Default::default(),
|
77 |
updated_at: Set(Local::now().date_naive()),
|
|
|
2 |
use sea_orm::{ActiveModelTrait, ColumnTrait, DbConn, DbErr, DeleteResult, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder};
|
3 |
use sea_orm::ActiveValue::Set;
|
4 |
use crate::entity::kb_info;
|
5 |
+
use crate::entity::kb2_doc;
|
6 |
use crate::entity::kb_info::Entity;
|
7 |
|
8 |
pub struct Query;
|
|
|
22 |
.all(db)
|
23 |
.await
|
24 |
}
|
25 |
+
|
26 |
+
pub async fn find_kb_infos_by_name(db: &DbConn, name: String) -> Result<Vec<kb_info::Model>, DbErr> {
|
27 |
+
Entity::find()
|
28 |
+
.filter(kb_info::Column::KbName.eq(name))
|
29 |
+
.all(db)
|
30 |
+
.await
|
31 |
+
}
|
32 |
|
33 |
pub async fn find_kb_infos_in_page(
|
34 |
db: &DbConn,
|
|
|
56 |
kb_info::ActiveModel {
|
57 |
kb_id: Default::default(),
|
58 |
uid: Set(form_data.uid.to_owned()),
|
59 |
+
kb_name: Set(form_data.kb_name.to_owned()),
|
60 |
icon: Set(form_data.icon.to_owned()),
|
61 |
created_at: Set(Local::now().date_naive()),
|
62 |
updated_at: Set(Local::now().date_naive()),
|
|
|
65 |
.await
|
66 |
}
|
67 |
|
68 |
+
pub async fn add_docs(
|
69 |
+
db: &DbConn,
|
70 |
+
kb_id: i64,
|
71 |
+
doc_ids: Vec<i64>
|
72 |
+
)-> Result<(), DbErr> {
|
73 |
+
for did in doc_ids{
|
74 |
+
let _ = kb2_doc::ActiveModel {
|
75 |
+
id: Default::default(),
|
76 |
+
kb_id: Set(kb_id),
|
77 |
+
did: Set(did),
|
78 |
+
}
|
79 |
+
.save(db)
|
80 |
+
.await?;
|
81 |
+
}
|
82 |
+
|
83 |
+
Ok(())
|
84 |
+
}
|
85 |
+
|
86 |
pub async fn update_kb_info_by_id(
|
87 |
db: &DbConn,
|
88 |
id: i64,
|
|
|
97 |
kb_info::ActiveModel {
|
98 |
kb_id: kb_info.kb_id,
|
99 |
uid: kb_info.uid,
|
100 |
+
kb_name: Set(form_data.kb_name.to_owned()),
|
101 |
icon: Set(form_data.icon.to_owned()),
|
102 |
created_at: Default::default(),
|
103 |
updated_at: Set(Local::now().date_naive()),
|