Spaces:

astro21
/

ds4

Sleeping

astro21 commited on Sep 6, 2024

Commit

64c1f09

verified ·

1 Parent(s): 4e4d7b0

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,16 +1,16 @@
-from fastapi import FastAPI, Request
 from pydantic import BaseModel
 import transformers
-import torch
 from fastapi.middleware.cors import CORSMiddleware
 import os
 access_token_read = os.getenv('DS4')
 print(access_token_read)
-from huggingface_hub import login
-login(token = access_token_read)
 # Define the FastAPI app
 app = FastAPI()
@@ -22,19 +22,22 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Load the model and tokenizer from Hugging Face
 model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"  # Replace with an appropriate model
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
 model = transformers.AutoModelForCausalLM.from_pretrained(
-    model_id, device_map="auto", torch_dtype=torch.bfloat16
 )
 pipeline = transformers.pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     temperature=0.7,
-    device_map="auto",
 )
 # Define the request model for email input
@@ -44,6 +47,11 @@ class EmailRequest(BaseModel):
     recipients: str
     body: str
 # Define the FastAPI endpoint for email summarization
 @app.post("/summarize-email/")
 async def summarize_email(email: EmailRequest):

+from fastapi import FastAPI
 from pydantic import BaseModel
 import transformers
 from fastapi.middleware.cors import CORSMiddleware
 import os
+from huggingface_hub import login
+# Get access token from environment variable
 access_token_read = os.getenv('DS4')
 print(access_token_read)
+# Login to Hugging Face Hub
+login(token=access_token_read)
 # Define the FastAPI app
 app = FastAPI()
     allow_headers=["*"],
 )
+# Load the model and tokenizer from Hugging Face, set device to CPU
 model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"  # Replace with an appropriate model
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
 model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id,
+    # Removed device_map and low_cpu_mem_usage to avoid the need for 'accelerate'
 )
+# Set up the text generation pipeline for CPU
 pipeline = transformers.pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     temperature=0.7,
+    device=-1  # Force CPU usage
 )
 # Define the request model for email input
     recipients: str
     body: str
+# Helper function to create the email prompt
+def create_email_prompt(subject, sender, recipients, body):
+    prompt = f"Subject: {subject}\nFrom: {sender}\nTo: {recipients}\n\n{body}\n\nSummarize this email."
+    return prompt
 # Define the FastAPI endpoint for email summarization
 @app.post("/summarize-email/")
 async def summarize_email(email: EmailRequest):