Artples commited on
Commit
b3364b6
·
verified ·
1 Parent(s): a75cc96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -26
app.py CHANGED
@@ -1,37 +1,49 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
 
6
- class UserRequest(BaseModel):
7
- prompt: str
8
-
9
- app = FastAPI()
10
-
11
  # Load the model and tokenizer
12
  model_name = "Artples/L-MChat-7b"
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModelForCausalLM.from_pretrained(model_name)
15
 
16
- # Make sure the model is on CPU
17
  device = torch.device("cpu")
18
  model.to(device)
19
 
20
- @app.post("/generate/")
21
- async def generate(request: UserRequest):
22
- try:
23
- # Tokenize the prompt
24
- inputs = tokenizer.encode(request.prompt, return_tensors="pt")
25
- inputs = inputs.to(device)
26
-
27
- # Generate a response from the model
28
- output = model.generate(inputs, max_length=100, num_return_sequences=1)
29
- response_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
-
31
- return {"response": response_text}
32
- except Exception as e:
33
- raise HTTPException(status_code=500, detail=str(e))
34
-
35
- if __name__ == "__main__":
36
- import uvicorn
37
- uvicorn.run(app, host="0.0.0.0", port=8080)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
 
 
 
 
 
5
  # Load the model and tokenizer
6
  model_name = "Artples/L-MChat-7b"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
+ # Ensure the model uses CPU
11
  device = torch.device("cpu")
12
  model.to(device)
13
 
14
+ def chat_with_model(json_input):
15
+ prompt = json_input['prompt']
16
+ # Tokenize the input prompt
17
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
18
+ inputs = inputs.to(device)
19
+
20
+ # Generate a response
21
+ output = model.generate(inputs, max_length=100, num_return_sequences=1)
22
+ response_text = tokenizer.decode(output[0], skip_special_tokens=True)
23
+
24
+ return {"choices": [{"text": response_text}]}
25
+
26
+ # Define the JSON input component
27
+ json_schema = {
28
+ "title": "Request",
29
+ "type": "object",
30
+ "properties": {
31
+ "prompt": {
32
+ "type": "string",
33
+ "description": "Enter your prompt here."
34
+ }
35
+ },
36
+ "required": ["prompt"]
37
+ }
38
+
39
+ # Create Gradio interface
40
+ iface = gr.Interface(
41
+ fn=chat_with_model,
42
+ inputs=gr.inputs.JSON(schema=json_schema),
43
+ outputs="json",
44
+ title="Chat with L-MChat-7b",
45
+ description="API-like interface using Gradio to simulate OpenAI API behavior."
46
+ )
47
+
48
+ # Run the Gradio app
49
+ iface.launch()