Spaces:

Artples
/

L-MChat-ZeroGPU

Running on Zero

Artples commited on May 1, 2024

Commit

b3364b6

verified ·

1 Parent(s): a75cc96

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,49 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-class UserRequest(BaseModel):
-    prompt: str
-app = FastAPI()
 # Load the model and tokenizer
 model_name = "Artples/L-MChat-7b"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-# Make sure the model is on CPU
 device = torch.device("cpu")
 model.to(device)
-@app.post("/generate/")
-async def generate(request: UserRequest):
-    try:
-        # Tokenize the prompt
-        inputs = tokenizer.encode(request.prompt, return_tensors="pt")
-        inputs = inputs.to(device)
-        # Generate a response from the model
-        output = model.generate(inputs, max_length=100, num_return_sequences=1)
-        response_text = tokenizer.decode(output[0], skip_special_tokens=True)
-        return {"response": response_text}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8080)

+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Load the model and tokenizer
 model_name = "Artples/L-MChat-7b"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+# Ensure the model uses CPU
 device = torch.device("cpu")
 model.to(device)
+def chat_with_model(json_input):
+    prompt = json_input['prompt']
+    # Tokenize the input prompt
+    inputs = tokenizer.encode(prompt, return_tensors="pt")
+    inputs = inputs.to(device)
+    # Generate a response
+    output = model.generate(inputs, max_length=100, num_return_sequences=1)
+    response_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return {"choices": [{"text": response_text}]}
+# Define the JSON input component
+json_schema = {
+    "title": "Request",
+    "type": "object",
+    "properties": {
+        "prompt": {
+            "type": "string",
+            "description": "Enter your prompt here."
+        }
+    },
+    "required": ["prompt"]
+}
+# Create Gradio interface
+iface = gr.Interface(
+    fn=chat_with_model,
+    inputs=gr.inputs.JSON(schema=json_schema),
+    outputs="json",
+    title="Chat with L-MChat-7b",
+    description="API-like interface using Gradio to simulate OpenAI API behavior."
+)
+# Run the Gradio app
+iface.launch()