Spaces:

sguertl
/

PP-API-v0.1

Sleeping

App Files Files Community

sguertl commited on Apr 30

Commit

6dc93bc

verified ·

1 Parent(s): 8706698

Use Llama 70B

Browse files

Files changed (1) hide show

app.py +67 -15

app.py CHANGED Viewed

@@ -2,37 +2,89 @@ from huggingface_hub import InferenceClient
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 import uvicorn
 import os
 app = FastAPI()
-MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 HF_TOKEN = os.environ["HF_TOKEN"]
 client = InferenceClient(model=MODEL, token=HF_TOKEN)
 class Prompt(BaseModel):
     message: str
 @app.post("/chat")
 async def chat(prompt: Prompt):
     print("Received POST request")
     print("Message:", prompt.message)
-    system_prompt = (
-        "You are a beginner programming student helping a peer. "
-        "Offer hints, ask questions, and support understanding—don’t give full solutions."
-    )
-    full_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>>\n{prompt.message} [/INST]"
-    print("Full Prompt:", full_prompt)
-    output = client.text_generation(
-        prompt=full_prompt,
-        max_new_tokens=200,
-        temperature=0.7,
-        top_p=0.95,
-        do_sample=True
     )
-    print("Text generation done", output.strip())
-    return {"reply": output.strip()}

 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 import uvicorn
+import requests
+import re
 import os
 app = FastAPI()
+MODEL = "meta-llama/Llama-3.3-70B-Instruct"
 HF_TOKEN = os.environ["HF_TOKEN"]
+PROMPTS_DOC_URL = os.environ["PROMPTS"]
 client = InferenceClient(model=MODEL, token=HF_TOKEN)
+def fetch_prompts_from_google_doc():
+    print("Fetching prompts from Google Doc...")
+    response = requests.get(PROMPTS_DOC_URL)
+    if response.status_code != 200:
+        raise Exception("Failed to fetch document")
+    text = response.text
+    prompts = {}
+    pattern = r"\{BEGIN (.*?)\}([\s\S]*?)\{END \1\}"
+    matches = re.findall(pattern, text)
+    for key, content in matches:
+        prompts[key.strip()] = content.strip()
+    return prompts
 class Prompt(BaseModel):
     message: str
+    code: str
 @app.post("/chat")
 async def chat(prompt: Prompt):
+    prompts = fetch_prompts_from_google_doc()
     print("Received POST request")
     print("Message:", prompt.message)
+    system_prompt = f"""
+    ### Unit Information ###
+    {prompts['UNIT_INFORMATION']}
+    ### Role Description ###
+    {prompts['ROLE_DESCRIPTION']}
+    ### Topic Information ###
+    {prompts['TOPIC_INFORMATION']}
+    ### Task Description ###
+    {prompts['TASK_DESCRIPTION']}
+    ### Reference Solution ###
+    {prompts['REFERENCE_SOLUTION']}
+    ### Behavioral Instructions ###
+    {prompts['BEHAVIORAL_INSTRUCTIONS']}
+    """
+    user_prompt = f"""
+    ### Message ###
+    {prompt.message}
+    ### Code ###
+    {prompt.code}
+    """
+    response = client.chat_completion(
+        [
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+            {
+                "role": "user",
+                "content": user_prompt
+            },
+        ],
+        max_tokens=2048,
+        temperature=0.2,
     )
+    text_response = response["choices"][0]["message"]["content"]
+    print("Text generation done", text_response.strip())
+    return {"reply": text_response.strip()}