KevinHuSh
commited on
Commit
·
3fc700a
1
Parent(s):
3245107
build dialog server; add thumbnail to docinfo; (#17)
Browse files- Cargo.toml +1 -0
- migration/src/m20220101_000001_create_table.rs +8 -7
- python/conf/sys.cnf +4 -4
- python/llm/chat_model.py +1 -0
- python/nlp/search.py +2 -0
- python/svr/dialog_svr.py +12 -12
- python/svr/parse_user_docs.py +5 -7
- src/api/doc_info.rs +47 -3
- src/api/user_info.rs +2 -1
- src/entity/doc_info.rs +2 -0
- src/entity/tag2_doc.rs +7 -7
- src/service/doc_info.rs +3 -1
Cargo.toml
CHANGED
|
@@ -26,6 +26,7 @@ migration = { path = "./migration" }
|
|
| 26 |
minio = "0.1.0"
|
| 27 |
futures-util = "0.3.29"
|
| 28 |
actix-multipart-extract = "0.1.5"
|
|
|
|
| 29 |
|
| 30 |
[[bin]]
|
| 31 |
name = "doc_gpt"
|
|
|
|
| 26 |
minio = "0.1.0"
|
| 27 |
futures-util = "0.3.29"
|
| 28 |
actix-multipart-extract = "0.1.5"
|
| 29 |
+
regex = "1.10.2"
|
| 30 |
|
| 31 |
[[bin]]
|
| 32 |
name = "doc_gpt"
|
migration/src/m20220101_000001_create_table.rs
CHANGED
|
@@ -201,7 +201,8 @@ impl MigrationTrait for Migration {
|
|
| 201 |
.col(ColumnDef::new(DocInfo::Location).string().not_null())
|
| 202 |
.col(ColumnDef::new(DocInfo::Size).big_integer().not_null())
|
| 203 |
.col(ColumnDef::new(DocInfo::Type).string().not_null())
|
| 204 |
-
.
|
|
|
|
| 205 |
.col(
|
| 206 |
ColumnDef::new(DocInfo::CreatedAt)
|
| 207 |
.timestamp_with_time_zone()
|
|
@@ -249,7 +250,6 @@ impl MigrationTrait for Migration {
|
|
| 249 |
.to_owned()
|
| 250 |
).await?;
|
| 251 |
|
| 252 |
-
let tm = now();
|
| 253 |
let root_insert = Query::insert()
|
| 254 |
.into_table(UserInfo::Table)
|
| 255 |
.columns([UserInfo::Email, UserInfo::Nickname, UserInfo::Password])
|
|
@@ -273,28 +273,28 @@ impl MigrationTrait for Migration {
|
|
| 273 |
.columns([TagInfo::Uid, TagInfo::TagName, TagInfo::Regx, TagInfo::Color, TagInfo::Icon])
|
| 274 |
.values_panic([
|
| 275 |
(1).into(),
|
| 276 |
-
"
|
| 277 |
".*\\.(mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa)".into(),
|
| 278 |
(1).into(),
|
| 279 |
(1).into(),
|
| 280 |
])
|
| 281 |
.values_panic([
|
| 282 |
(1).into(),
|
| 283 |
-
"
|
| 284 |
-
".*\\.(png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng)".into(),
|
| 285 |
(2).into(),
|
| 286 |
(2).into(),
|
| 287 |
])
|
| 288 |
.values_panic([
|
| 289 |
(1).into(),
|
| 290 |
-
"
|
| 291 |
".*\\.(WAV|FLAC|APE|ALAC|WavPack|WV|MP3|AAC|Ogg|Vorbis|Opus)".into(),
|
| 292 |
(3).into(),
|
| 293 |
(3).into(),
|
| 294 |
])
|
| 295 |
.values_panic([
|
| 296 |
(1).into(),
|
| 297 |
-
"
|
| 298 |
".*\\.(pdf|doc|ppt|yml|xml|htm|json|csv|txt|ini|xsl|wps|rtf|hlp)".into(),
|
| 299 |
(3).into(),
|
| 300 |
(3).into(),
|
|
@@ -419,6 +419,7 @@ enum DocInfo {
|
|
| 419 |
Location,
|
| 420 |
Size,
|
| 421 |
Type,
|
|
|
|
| 422 |
CreatedAt,
|
| 423 |
UpdatedAt,
|
| 424 |
IsDeleted,
|
|
|
|
| 201 |
.col(ColumnDef::new(DocInfo::Location).string().not_null())
|
| 202 |
.col(ColumnDef::new(DocInfo::Size).big_integer().not_null())
|
| 203 |
.col(ColumnDef::new(DocInfo::Type).string().not_null())
|
| 204 |
+
.col(ColumnDef::new(DocInfo::ThumbnailBase64).string().not_null())
|
| 205 |
+
.comment("doc type|folder")
|
| 206 |
.col(
|
| 207 |
ColumnDef::new(DocInfo::CreatedAt)
|
| 208 |
.timestamp_with_time_zone()
|
|
|
|
| 250 |
.to_owned()
|
| 251 |
).await?;
|
| 252 |
|
|
|
|
| 253 |
let root_insert = Query::insert()
|
| 254 |
.into_table(UserInfo::Table)
|
| 255 |
.columns([UserInfo::Email, UserInfo::Nickname, UserInfo::Password])
|
|
|
|
| 273 |
.columns([TagInfo::Uid, TagInfo::TagName, TagInfo::Regx, TagInfo::Color, TagInfo::Icon])
|
| 274 |
.values_panic([
|
| 275 |
(1).into(),
|
| 276 |
+
"Video".into(),
|
| 277 |
".*\\.(mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa)".into(),
|
| 278 |
(1).into(),
|
| 279 |
(1).into(),
|
| 280 |
])
|
| 281 |
.values_panic([
|
| 282 |
(1).into(),
|
| 283 |
+
"Picture".into(),
|
| 284 |
+
".*\\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng)".into(),
|
| 285 |
(2).into(),
|
| 286 |
(2).into(),
|
| 287 |
])
|
| 288 |
.values_panic([
|
| 289 |
(1).into(),
|
| 290 |
+
"Music".into(),
|
| 291 |
".*\\.(WAV|FLAC|APE|ALAC|WavPack|WV|MP3|AAC|Ogg|Vorbis|Opus)".into(),
|
| 292 |
(3).into(),
|
| 293 |
(3).into(),
|
| 294 |
])
|
| 295 |
.values_panic([
|
| 296 |
(1).into(),
|
| 297 |
+
"Document".into(),
|
| 298 |
".*\\.(pdf|doc|ppt|yml|xml|htm|json|csv|txt|ini|xsl|wps|rtf|hlp)".into(),
|
| 299 |
(3).into(),
|
| 300 |
(3).into(),
|
|
|
|
| 419 |
Location,
|
| 420 |
Size,
|
| 421 |
Type,
|
| 422 |
+
ThumbnailBase64,
|
| 423 |
CreatedAt,
|
| 424 |
UpdatedAt,
|
| 425 |
IsDeleted,
|
python/conf/sys.cnf
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
[infiniflow]
|
| 2 |
-
es=http://
|
| 3 |
pgdb_usr=root
|
| 4 |
pgdb_pwd=infiniflow_docgpt
|
| 5 |
-
pgdb_host=
|
| 6 |
-
pgdb_port=
|
| 7 |
-
minio_host=
|
| 8 |
minio_usr=infiniflow
|
| 9 |
minio_pwd=infiniflow_docgpt
|
| 10 |
|
|
|
|
| 1 |
[infiniflow]
|
| 2 |
+
es=http://es01:9200
|
| 3 |
pgdb_usr=root
|
| 4 |
pgdb_pwd=infiniflow_docgpt
|
| 5 |
+
pgdb_host=postgres
|
| 6 |
+
pgdb_port=5432
|
| 7 |
+
minio_host=minio:9000
|
| 8 |
minio_usr=infiniflow
|
| 9 |
minio_pwd=infiniflow_docgpt
|
| 10 |
|
python/llm/chat_model.py
CHANGED
|
@@ -24,6 +24,7 @@ class QWen(Base):
|
|
| 24 |
from http import HTTPStatus
|
| 25 |
from dashscope import Generation
|
| 26 |
from dashscope.api_entities.dashscope_response import Role
|
|
|
|
| 27 |
response = Generation.call(
|
| 28 |
Generation.Models.qwen_turbo,
|
| 29 |
messages=messages,
|
|
|
|
| 24 |
from http import HTTPStatus
|
| 25 |
from dashscope import Generation
|
| 26 |
from dashscope.api_entities.dashscope_response import Role
|
| 27 |
+
# export DASHSCOPE_API_KEY=YOUR_DASHSCOPE_API_KEY
|
| 28 |
response = Generation.call(
|
| 29 |
Generation.Models.qwen_turbo,
|
| 30 |
messages=messages,
|
python/nlp/search.py
CHANGED
|
@@ -9,6 +9,8 @@ from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity
|
|
| 9 |
import numpy as np
|
| 10 |
from copy import deepcopy
|
| 11 |
|
|
|
|
|
|
|
| 12 |
class Dealer:
|
| 13 |
def __init__(self, es, emb_mdl):
|
| 14 |
self.qryr = query.EsQueryer(es)
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
from copy import deepcopy
|
| 11 |
|
| 12 |
+
def index_name(uid):return f"docgpt_{uid}"
|
| 13 |
+
|
| 14 |
class Dealer:
|
| 15 |
def __init__(self, es, emb_mdl):
|
| 16 |
self.qryr = query.EsQueryer(es)
|
python/svr/dialog_svr.py
CHANGED
|
@@ -6,11 +6,10 @@ from tornado.ioloop import IOLoop
|
|
| 6 |
from tornado.httpserver import HTTPServer
|
| 7 |
from tornado.options import define,options
|
| 8 |
from util import es_conn, setup_logging
|
| 9 |
-
from svr import sec_search as search
|
| 10 |
-
from svr.rpc_proxy import RPCProxy
|
| 11 |
from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity
|
| 12 |
from nlp import huqie
|
| 13 |
from nlp import query as Query
|
|
|
|
| 14 |
from llm import HuEmbedding, GptTurbo
|
| 15 |
import numpy as np
|
| 16 |
from io import BytesIO
|
|
@@ -38,7 +37,7 @@ def get_QA_pairs(hists):
|
|
| 38 |
|
| 39 |
|
| 40 |
|
| 41 |
-
def get_instruction(sres, top_i, max_len=8096 fld="content_ltks"):
|
| 42 |
max_len //= len(top_i)
|
| 43 |
# add instruction to prompt
|
| 44 |
instructions = [re.sub(r"[\r\n]+", " ", sres.field[sres.ids[i]][fld]) for i in top_i]
|
|
@@ -96,10 +95,11 @@ class Handler(RequestHandler):
|
|
| 96 |
try:
|
| 97 |
question = param.get("history",[{"user": "Hi!"}])[-1]["user"]
|
| 98 |
res = SE.search({
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
sim = SE.rerank(res, question)
|
| 105 |
rk_idx = np.argsort(sim*-1)
|
|
@@ -112,12 +112,12 @@ class Handler(RequestHandler):
|
|
| 112 |
refer = OrderedDict()
|
| 113 |
docnms = {}
|
| 114 |
for i in rk_idx:
|
| 115 |
-
did = res.field[res.ids[i]]["doc_id"]
|
| 116 |
-
if did not in docnms: docnms[did] = res.field[res.ids[i]]["docnm_kwd"]
|
| 117 |
if did not in refer: refer[did] = []
|
| 118 |
refer[did].append({
|
| 119 |
"chunk_id": res.ids[i],
|
| 120 |
-
"content": res.field[res.ids[i]]["content_ltks"]
|
| 121 |
"image": ""
|
| 122 |
})
|
| 123 |
|
|
@@ -128,7 +128,7 @@ class Handler(RequestHandler):
|
|
| 128 |
"data":{
|
| 129 |
"uid": param["uid"],
|
| 130 |
"dialog_id": param["dialog_id"],
|
| 131 |
-
"assistant": ans
|
| 132 |
"refer": [{
|
| 133 |
"did": did,
|
| 134 |
"doc_name": docnms[did],
|
|
@@ -153,7 +153,7 @@ if __name__ == '__main__':
|
|
| 153 |
parser.add_argument("--port", default=4455, type=int, help="Port used for service")
|
| 154 |
ARGS = parser.parse_args()
|
| 155 |
|
| 156 |
-
SE = search.
|
| 157 |
|
| 158 |
app = Application([(r'/v1/chat/completions', Handler)],debug=False)
|
| 159 |
http_server = HTTPServer(app)
|
|
|
|
| 6 |
from tornado.httpserver import HTTPServer
|
| 7 |
from tornado.options import define,options
|
| 8 |
from util import es_conn, setup_logging
|
|
|
|
|
|
|
| 9 |
from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity
|
| 10 |
from nlp import huqie
|
| 11 |
from nlp import query as Query
|
| 12 |
+
from nlp import search
|
| 13 |
from llm import HuEmbedding, GptTurbo
|
| 14 |
import numpy as np
|
| 15 |
from io import BytesIO
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
+
def get_instruction(sres, top_i, max_len=8096, fld="content_ltks"):
|
| 41 |
max_len //= len(top_i)
|
| 42 |
# add instruction to prompt
|
| 43 |
instructions = [re.sub(r"[\r\n]+", " ", sres.field[sres.ids[i]][fld]) for i in top_i]
|
|
|
|
| 95 |
try:
|
| 96 |
question = param.get("history",[{"user": "Hi!"}])[-1]["user"]
|
| 97 |
res = SE.search({
|
| 98 |
+
"question": question,
|
| 99 |
+
"kb_ids": param.get("kb_ids", []),
|
| 100 |
+
"size": param.get("topn", 15)},
|
| 101 |
+
search.index_name(param["uid"])
|
| 102 |
+
)
|
| 103 |
|
| 104 |
sim = SE.rerank(res, question)
|
| 105 |
rk_idx = np.argsort(sim*-1)
|
|
|
|
| 112 |
refer = OrderedDict()
|
| 113 |
docnms = {}
|
| 114 |
for i in rk_idx:
|
| 115 |
+
did = res.field[res.ids[i]]["doc_id"]
|
| 116 |
+
if did not in docnms: docnms[did] = res.field[res.ids[i]]["docnm_kwd"]
|
| 117 |
if did not in refer: refer[did] = []
|
| 118 |
refer[did].append({
|
| 119 |
"chunk_id": res.ids[i],
|
| 120 |
+
"content": res.field[res.ids[i]]["content_ltks"],
|
| 121 |
"image": ""
|
| 122 |
})
|
| 123 |
|
|
|
|
| 128 |
"data":{
|
| 129 |
"uid": param["uid"],
|
| 130 |
"dialog_id": param["dialog_id"],
|
| 131 |
+
"assistant": ans,
|
| 132 |
"refer": [{
|
| 133 |
"did": did,
|
| 134 |
"doc_name": docnms[did],
|
|
|
|
| 153 |
parser.add_argument("--port", default=4455, type=int, help="Port used for service")
|
| 154 |
ARGS = parser.parse_args()
|
| 155 |
|
| 156 |
+
SE = search.Dealer(es_conn.HuEs("infiniflow"), EMBEDDING)
|
| 157 |
|
| 158 |
app = Application([(r'/v1/chat/completions', Handler)],debug=False)
|
| 159 |
http_server = HTTPServer(app)
|
python/svr/parse_user_docs.py
CHANGED
|
@@ -6,7 +6,7 @@ from util.db_conn import Postgres
|
|
| 6 |
from util.minio_conn import HuMinio
|
| 7 |
from util import rmSpace, findMaxDt
|
| 8 |
from FlagEmbedding import FlagModel
|
| 9 |
-
from nlp import huchunk, huqie
|
| 10 |
import base64, hashlib
|
| 11 |
from io import BytesIO
|
| 12 |
import pandas as pd
|
|
@@ -103,7 +103,7 @@ def build(row):
|
|
| 103 |
if(!ctx._source.kb_id.contains('%s'))
|
| 104 |
ctx._source.kb_id.add('%s');
|
| 105 |
"""%(str(row["kb_id"]), str(row["kb_id"])),
|
| 106 |
-
idxnm = index_name(row["uid"])
|
| 107 |
)
|
| 108 |
set_progress(row["kb2doc_id"], 1, "Done")
|
| 109 |
return []
|
|
@@ -171,10 +171,8 @@ def build(row):
|
|
| 171 |
return docs
|
| 172 |
|
| 173 |
|
| 174 |
-
def index_name(uid):return f"docgpt_{uid}"
|
| 175 |
-
|
| 176 |
def init_kb(row):
|
| 177 |
-
idxnm = index_name(row["uid"])
|
| 178 |
if ES.indexExist(idxnm): return
|
| 179 |
return ES.createIdx(idxnm, json.load(open("conf/mapping.json", "r")))
|
| 180 |
|
|
@@ -199,7 +197,7 @@ def rm_doc_from_kb(df):
|
|
| 199 |
ctx._source.kb_id.indexOf('%s')
|
| 200 |
);
|
| 201 |
"""%(str(r["kb_id"]),str(r["kb_id"])),
|
| 202 |
-
idxnm = index_name(r["uid"])
|
| 203 |
)
|
| 204 |
if len(df) == 0:return
|
| 205 |
sql = """
|
|
@@ -233,7 +231,7 @@ def main(comm, mod):
|
|
| 233 |
set_progress(r["kb2doc_id"], random.randint(70, 95)/100.,
|
| 234 |
"Finished embedding! Start to build index!")
|
| 235 |
init_kb(r)
|
| 236 |
-
es_r = ES.bulk(cks, index_name(r["uid"]))
|
| 237 |
if es_r:
|
| 238 |
set_progress(r["kb2doc_id"], -1, "Index failure!")
|
| 239 |
print(es_r)
|
|
|
|
| 6 |
from util.minio_conn import HuMinio
|
| 7 |
from util import rmSpace, findMaxDt
|
| 8 |
from FlagEmbedding import FlagModel
|
| 9 |
+
from nlp import huchunk, huqie, search
|
| 10 |
import base64, hashlib
|
| 11 |
from io import BytesIO
|
| 12 |
import pandas as pd
|
|
|
|
| 103 |
if(!ctx._source.kb_id.contains('%s'))
|
| 104 |
ctx._source.kb_id.add('%s');
|
| 105 |
"""%(str(row["kb_id"]), str(row["kb_id"])),
|
| 106 |
+
idxnm = search.index_name(row["uid"])
|
| 107 |
)
|
| 108 |
set_progress(row["kb2doc_id"], 1, "Done")
|
| 109 |
return []
|
|
|
|
| 171 |
return docs
|
| 172 |
|
| 173 |
|
|
|
|
|
|
|
| 174 |
def init_kb(row):
|
| 175 |
+
idxnm = search.index_name(row["uid"])
|
| 176 |
if ES.indexExist(idxnm): return
|
| 177 |
return ES.createIdx(idxnm, json.load(open("conf/mapping.json", "r")))
|
| 178 |
|
|
|
|
| 197 |
ctx._source.kb_id.indexOf('%s')
|
| 198 |
);
|
| 199 |
"""%(str(r["kb_id"]),str(r["kb_id"])),
|
| 200 |
+
idxnm = search.index_name(r["uid"])
|
| 201 |
)
|
| 202 |
if len(df) == 0:return
|
| 203 |
sql = """
|
|
|
|
| 231 |
set_progress(r["kb2doc_id"], random.randint(70, 95)/100.,
|
| 232 |
"Finished embedding! Start to build index!")
|
| 233 |
init_kb(r)
|
| 234 |
+
es_r = ES.bulk(cks, search.index_name(r["uid"]))
|
| 235 |
if es_r:
|
| 236 |
set_progress(r["kb2doc_id"], -1, "Index failure!")
|
| 237 |
print(es_r)
|
src/api/doc_info.rs
CHANGED
|
@@ -11,6 +11,7 @@ use crate::entity::doc_info::Model;
|
|
| 11 |
use crate::errors::AppError;
|
| 12 |
use crate::service::doc_info::{ Mutation, Query };
|
| 13 |
use serde::Deserialize;
|
|
|
|
| 14 |
|
| 15 |
fn now() -> chrono::DateTime<FixedOffset> {
|
| 16 |
Utc::now().with_timezone(&FixedOffset::east_opt(3600 * 8).unwrap())
|
|
@@ -64,6 +65,41 @@ pub struct UploadForm {
|
|
| 64 |
did: i64,
|
| 65 |
}
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
#[post("/v1.0/upload")]
|
| 68 |
async fn upload(
|
| 69 |
payload: Multipart<UploadForm>,
|
|
@@ -114,7 +150,13 @@ async fn upload(
|
|
| 114 |
print!("Existing bucket: {}", bucket_name.clone());
|
| 115 |
}
|
| 116 |
|
| 117 |
-
let location = format!("/{}/{}", payload.did, fnm)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
print!("===>{}", location.clone());
|
| 119 |
s3_client.put_object(
|
| 120 |
&mut PutObjectArgs::new(
|
|
@@ -129,10 +171,11 @@ async fn upload(
|
|
| 129 |
let doc = Mutation::create_doc_info(&data.conn, Model {
|
| 130 |
did: Default::default(),
|
| 131 |
uid: uid,
|
| 132 |
-
doc_name: fnm,
|
| 133 |
size: payload.file_field.bytes.len() as i64,
|
| 134 |
location,
|
| 135 |
-
r#type:
|
|
|
|
| 136 |
created_at: now(),
|
| 137 |
updated_at: now(),
|
| 138 |
is_deleted: Default::default(),
|
|
@@ -214,6 +257,7 @@ async fn new_folder(
|
|
| 214 |
size: 0,
|
| 215 |
r#type: "folder".to_string(),
|
| 216 |
location: "".to_owned(),
|
|
|
|
| 217 |
created_at: now(),
|
| 218 |
updated_at: now(),
|
| 219 |
is_deleted: Default::default(),
|
|
|
|
| 11 |
use crate::errors::AppError;
|
| 12 |
use crate::service::doc_info::{ Mutation, Query };
|
| 13 |
use serde::Deserialize;
|
| 14 |
+
use regex::Regex;
|
| 15 |
|
| 16 |
fn now() -> chrono::DateTime<FixedOffset> {
|
| 17 |
Utc::now().with_timezone(&FixedOffset::east_opt(3600 * 8).unwrap())
|
|
|
|
| 65 |
did: i64,
|
| 66 |
}
|
| 67 |
|
| 68 |
+
fn file_type(filename: &String) -> String {
|
| 69 |
+
let fnm = filename.to_lowercase();
|
| 70 |
+
if
|
| 71 |
+
let Some(_) = Regex::new(r"\.(mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa)$")
|
| 72 |
+
.unwrap()
|
| 73 |
+
.captures(&fnm)
|
| 74 |
+
{
|
| 75 |
+
return "Video".to_owned();
|
| 76 |
+
}
|
| 77 |
+
if
|
| 78 |
+
let Some(_) = Regex::new(
|
| 79 |
+
r"\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng)$"
|
| 80 |
+
)
|
| 81 |
+
.unwrap()
|
| 82 |
+
.captures(&fnm)
|
| 83 |
+
{
|
| 84 |
+
return "Picture".to_owned();
|
| 85 |
+
}
|
| 86 |
+
if
|
| 87 |
+
let Some(_) = Regex::new(r"\.(WAV|FLAC|APE|ALAC|WavPack|WV|MP3|AAC|Ogg|Vorbis|Opus)$")
|
| 88 |
+
.unwrap()
|
| 89 |
+
.captures(&fnm)
|
| 90 |
+
{
|
| 91 |
+
return "Music".to_owned();
|
| 92 |
+
}
|
| 93 |
+
if
|
| 94 |
+
let Some(_) = Regex::new(r"\.(pdf|doc|ppt|yml|xml|htm|json|csv|txt|ini|xsl|wps|rtf|hlp)$")
|
| 95 |
+
.unwrap()
|
| 96 |
+
.captures(&fnm)
|
| 97 |
+
{
|
| 98 |
+
return "Document".to_owned();
|
| 99 |
+
}
|
| 100 |
+
"Other".to_owned()
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
#[post("/v1.0/upload")]
|
| 104 |
async fn upload(
|
| 105 |
payload: Multipart<UploadForm>,
|
|
|
|
| 150 |
print!("Existing bucket: {}", bucket_name.clone());
|
| 151 |
}
|
| 152 |
|
| 153 |
+
let location = format!("/{}/{}", payload.did, fnm)
|
| 154 |
+
.as_bytes()
|
| 155 |
+
.to_vec()
|
| 156 |
+
.iter()
|
| 157 |
+
.map(|b| format!("{:02x}", b).to_string())
|
| 158 |
+
.collect::<Vec<String>>()
|
| 159 |
+
.join("");
|
| 160 |
print!("===>{}", location.clone());
|
| 161 |
s3_client.put_object(
|
| 162 |
&mut PutObjectArgs::new(
|
|
|
|
| 171 |
let doc = Mutation::create_doc_info(&data.conn, Model {
|
| 172 |
did: Default::default(),
|
| 173 |
uid: uid,
|
| 174 |
+
doc_name: fnm.clone(),
|
| 175 |
size: payload.file_field.bytes.len() as i64,
|
| 176 |
location,
|
| 177 |
+
r#type: file_type(&fnm),
|
| 178 |
+
thumbnail_base64: Default::default(),
|
| 179 |
created_at: now(),
|
| 180 |
updated_at: now(),
|
| 181 |
is_deleted: Default::default(),
|
|
|
|
| 257 |
size: 0,
|
| 258 |
r#type: "folder".to_string(),
|
| 259 |
location: "".to_owned(),
|
| 260 |
+
thumbnail_base64: Default::default(),
|
| 261 |
created_at: now(),
|
| 262 |
updated_at: now(),
|
| 263 |
is_deleted: Default::default(),
|
src/api/user_info.rs
CHANGED
|
@@ -90,12 +90,13 @@ async fn register(
|
|
| 90 |
doc_name: "/".into(),
|
| 91 |
size: 0,
|
| 92 |
location: "".into(),
|
|
|
|
| 93 |
r#type: "folder".to_string(),
|
| 94 |
created_at: now(),
|
| 95 |
updated_at: now(),
|
| 96 |
is_deleted: Default::default(),
|
| 97 |
}).await?;
|
| 98 |
-
let tnm = vec!["
|
| 99 |
let tregx = vec![
|
| 100 |
".*\\.(mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa)",
|
| 101 |
".*\\.(png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng)",
|
|
|
|
| 90 |
doc_name: "/".into(),
|
| 91 |
size: 0,
|
| 92 |
location: "".into(),
|
| 93 |
+
thumbnail_base64: "".into(),
|
| 94 |
r#type: "folder".to_string(),
|
| 95 |
created_at: now(),
|
| 96 |
updated_at: now(),
|
| 97 |
is_deleted: Default::default(),
|
| 98 |
}).await?;
|
| 99 |
+
let tnm = vec!["Video", "Picture", "Music", "Document"];
|
| 100 |
let tregx = vec![
|
| 101 |
".*\\.(mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa)",
|
| 102 |
".*\\.(png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng)",
|
src/entity/doc_info.rs
CHANGED
|
@@ -17,6 +17,8 @@ pub struct Model {
|
|
| 17 |
#[serde(skip_deserializing)]
|
| 18 |
pub location: String,
|
| 19 |
#[serde(skip_deserializing)]
|
|
|
|
|
|
|
| 20 |
pub created_at: DateTime<FixedOffset>,
|
| 21 |
#[serde(skip_deserializing)]
|
| 22 |
pub updated_at: DateTime<FixedOffset>,
|
|
|
|
| 17 |
#[serde(skip_deserializing)]
|
| 18 |
pub location: String,
|
| 19 |
#[serde(skip_deserializing)]
|
| 20 |
+
pub thumbnail_base64: String,
|
| 21 |
+
#[serde(skip_deserializing)]
|
| 22 |
pub created_at: DateTime<FixedOffset>,
|
| 23 |
#[serde(skip_deserializing)]
|
| 24 |
pub updated_at: DateTime<FixedOffset>,
|
src/entity/tag2_doc.rs
CHANGED
|
@@ -9,28 +9,28 @@ pub struct Model {
|
|
| 9 |
#[sea_orm(index)]
|
| 10 |
pub tag_id: i64,
|
| 11 |
#[sea_orm(index)]
|
| 12 |
-
pub
|
| 13 |
}
|
| 14 |
|
| 15 |
#[derive(Debug, Clone, Copy, EnumIter)]
|
| 16 |
pub enum Relation {
|
| 17 |
-
DocInfo,
|
| 18 |
Tag,
|
|
|
|
| 19 |
}
|
| 20 |
|
| 21 |
impl RelationTrait for Relation {
|
| 22 |
fn def(&self) -> sea_orm::RelationDef {
|
| 23 |
match self {
|
| 24 |
-
Self::DocInfo =>
|
| 25 |
-
Entity::belongs_to(super::doc_info::Entity)
|
| 26 |
-
.from(Column::Uid)
|
| 27 |
-
.to(super::doc_info::Column::Uid)
|
| 28 |
-
.into(),
|
| 29 |
Self::Tag =>
|
| 30 |
Entity::belongs_to(super::tag_info::Entity)
|
| 31 |
.from(Column::TagId)
|
| 32 |
.to(super::tag_info::Column::Tid)
|
| 33 |
.into(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
}
|
| 36 |
}
|
|
|
|
| 9 |
#[sea_orm(index)]
|
| 10 |
pub tag_id: i64,
|
| 11 |
#[sea_orm(index)]
|
| 12 |
+
pub did: i64,
|
| 13 |
}
|
| 14 |
|
| 15 |
#[derive(Debug, Clone, Copy, EnumIter)]
|
| 16 |
pub enum Relation {
|
|
|
|
| 17 |
Tag,
|
| 18 |
+
DocInfo,
|
| 19 |
}
|
| 20 |
|
| 21 |
impl RelationTrait for Relation {
|
| 22 |
fn def(&self) -> sea_orm::RelationDef {
|
| 23 |
match self {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
Self::Tag =>
|
| 25 |
Entity::belongs_to(super::tag_info::Entity)
|
| 26 |
.from(Column::TagId)
|
| 27 |
.to(super::tag_info::Column::Tid)
|
| 28 |
.into(),
|
| 29 |
+
Self::DocInfo =>
|
| 30 |
+
Entity::belongs_to(super::doc_info::Entity)
|
| 31 |
+
.from(Column::Did)
|
| 32 |
+
.to(super::doc_info::Column::Did)
|
| 33 |
+
.into(),
|
| 34 |
}
|
| 35 |
}
|
| 36 |
}
|
src/service/doc_info.rs
CHANGED
|
@@ -163,7 +163,7 @@ impl Query {
|
|
| 163 |
);
|
| 164 |
}
|
| 165 |
if tag.regx.len() > 0 {
|
| 166 |
-
cond.push_str(&format!(" and doc_name ~ '{}'", tag.regx));
|
| 167 |
}
|
| 168 |
}
|
| 169 |
|
|
@@ -254,6 +254,7 @@ impl Mutation {
|
|
| 254 |
size: Set(form_data.size.to_owned()),
|
| 255 |
r#type: Set(form_data.r#type.to_owned()),
|
| 256 |
location: Set(form_data.location.to_owned()),
|
|
|
|
| 257 |
created_at: Set(form_data.created_at.to_owned()),
|
| 258 |
updated_at: Set(form_data.updated_at.to_owned()),
|
| 259 |
is_deleted: Default::default(),
|
|
@@ -277,6 +278,7 @@ impl Mutation {
|
|
| 277 |
size: Set(form_data.size.to_owned()),
|
| 278 |
r#type: Set(form_data.r#type.to_owned()),
|
| 279 |
location: Set(form_data.location.to_owned()),
|
|
|
|
| 280 |
created_at: doc_info.created_at,
|
| 281 |
updated_at: Set(now()),
|
| 282 |
is_deleted: Default::default(),
|
|
|
|
| 163 |
);
|
| 164 |
}
|
| 165 |
if tag.regx.len() > 0 {
|
| 166 |
+
cond.push_str(&format!(" and (type='{}' or doc_name ~ '{}') ", tag.tag_name, tag.regx));
|
| 167 |
}
|
| 168 |
}
|
| 169 |
|
|
|
|
| 254 |
size: Set(form_data.size.to_owned()),
|
| 255 |
r#type: Set(form_data.r#type.to_owned()),
|
| 256 |
location: Set(form_data.location.to_owned()),
|
| 257 |
+
thumbnail_base64: Default::default(),
|
| 258 |
created_at: Set(form_data.created_at.to_owned()),
|
| 259 |
updated_at: Set(form_data.updated_at.to_owned()),
|
| 260 |
is_deleted: Default::default(),
|
|
|
|
| 278 |
size: Set(form_data.size.to_owned()),
|
| 279 |
r#type: Set(form_data.r#type.to_owned()),
|
| 280 |
location: Set(form_data.location.to_owned()),
|
| 281 |
+
thumbnail_base64: doc_info.thumbnail_base64,
|
| 282 |
created_at: doc_info.created_at,
|
| 283 |
updated_at: Set(now()),
|
| 284 |
is_deleted: Default::default(),
|