sanbo commited on
Commit
cd320c7
·
1 Parent(s): 2c68d90

update sth. at 2025-01-16 21:48:33

Browse files
Files changed (2) hide show
  1. app.py +45 -23
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,32 +1,54 @@
1
- import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModel
3
  import torch
 
 
4
 
5
- # 1. 加载模型和分词器
6
- model_name = "jinaai/jina-embeddings-v3" # 替换为您实际使用的模型名
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
9
 
10
- # 2. 定义生成嵌入的函数
11
- def generate_embeddings(text):
12
- # 使用分词器处理输入文本
13
- inputs = tokenizer(text, return_tensors="pt")
14
- # 禁用梯度计算,以减少资源消耗
15
- with torch.no_grad():
16
- # 获取最后一层隐藏状态并计算平均值作为嵌入
17
- embeddings = model(**inputs).last_hidden_state.mean(dim=1)
18
- # 将嵌入转换为Python列表,方便Gradio输出
19
- return embeddings.numpy().tolist()
 
 
 
 
 
 
20
 
21
- # 3. 使用Gradio定义接口
22
- iface = gr.Interface(
23
- fn=generate_embeddings, # 调用嵌入生成函数
24
- inputs="text", # 输入类型为文本
25
- outputs="json", # 输出为JSON格式,方便API调用
26
- title="Text Embedding Generator",
27
- description="Enter text to generate embeddings using the Jina model."
28
- )
29
 
30
- # 4. 启动Gradio应用
31
  if __name__ == "__main__":
32
- iface.launch()
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModel
4
  import torch
5
+ from typing import List, Dict
6
+ import uvicorn
7
 
8
+ # 定义请求和响应模型
9
+ class TextRequest(BaseModel):
10
+ text: str
11
+
12
+ class EmbeddingResponse(BaseModel):
13
+ status: str
14
+ embeddings: List[List[float]]
15
+
16
+ # 创建FastAPI应用
17
+ app = FastAPI(
18
+ title="Jina Embeddings API",
19
+ description="Text embedding generation service using jina-embeddings-v3",
20
+ version="1.0.0"
21
+ )
22
+
23
+ # 加载模型和分词器
24
+ model_name = "jinaai/jina-embeddings-v3"
25
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
26
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
27
 
28
+ @app.post("/generate_embeddings", response_model=EmbeddingResponse)
29
+ async def generate_embeddings(request: TextRequest):
30
+ try:
31
+ # 使用分词器处理输入文本
32
+ inputs = tokenizer(request.text, return_tensors="pt", truncation=True, max_length=512)
33
+
34
+ # 生成嵌入
35
+ with torch.no_grad():
36
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1)
37
+
38
+ return EmbeddingResponse(
39
+ status="success",
40
+ embeddings=embeddings.numpy().tolist()
41
+ )
42
+ except Exception as e:
43
+ raise HTTPException(status_code=500, detail=str(e))
44
 
45
+ @app.get("/")
46
+ async def root():
47
+ return {
48
+ "status": "active",
49
+ "model": model_name,
50
+ "usage": "Send POST request to /generate_embeddings"
51
+ }
 
52
 
 
53
  if __name__ == "__main__":
54
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  transformers
2
  torch
3
- einops
 
 
 
 
1
  transformers
2
  torch
3
+ einops
4
+ fastapi
5
+ uvicorn
6
+ pydantic