uhoui commited on
Commit
795d943
·
1 Parent(s): 4bdd160

new added OpenAI API kinda

Browse files
Files changed (2) hide show
  1. app.py +67 -5
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,69 @@
1
- import gradio as gr
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
+ import uvicorn
6
+ import os
7
 
8
+ # Load model (Mistral, Mixtral, Llama2, etc. that works on zeroGPU)
9
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
12
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
+ # Create app
15
+ app = FastAPI()
16
+
17
+ # Data format matching OpenAI API
18
+ class Message(BaseModel):
19
+ role: str
20
+ content: str
21
+
22
+ class ChatRequest(BaseModel):
23
+ model: str
24
+ messages: list[Message]
25
+ temperature: float = 0.7
26
+ top_p: float = 1.0
27
+ max_tokens: int = 256
28
+ stream: bool = False
29
+
30
+ @app.post("/v1/chat/completions")
31
+ async def chat_completions(request: ChatRequest):
32
+ # Combine chat messages into a prompt
33
+ prompt = ""
34
+ for msg in request.messages:
35
+ prompt += f"{msg.role}: {msg.content}\n"
36
+ prompt += "assistant:"
37
+
38
+ # Generate output
39
+ output = generator(prompt, max_new_tokens=request.max_tokens, temperature=request.temperature)[0]["generated_text"]
40
+
41
+ # Extract assistant response
42
+ assistant_reply = output.split("assistant:")[-1].strip()
43
+
44
+ # Build OpenAI-compatible response
45
+ return JSONResponse({
46
+ "id": "chatcmpl-fake001",
47
+ "object": "chat.completion",
48
+ "created": 1234567890,
49
+ "model": request.model,
50
+ "choices": [
51
+ {
52
+ "index": 0,
53
+ "message": {
54
+ "role": "assistant",
55
+ "content": assistant_reply
56
+ },
57
+ "finish_reason": "stop"
58
+ }
59
+ ],
60
+ "usage": {
61
+ "prompt_tokens": 0,
62
+ "completion_tokens": 0,
63
+ "total_tokens": 0
64
+ }
65
+ })
66
+
67
+ # Run app if local (Spaces will handle this themselves via gradio)
68
+ if __name__ == "__main__":
69
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ huggingface_hub