Spaces:

sugiv
/

LeetMonkey_8Bit_GGUF_Stream_Tokens

Sleeping

App Files Files Community

sugiv commited on Sep 8, 2024

Commit

5b97345

1 Parent(s): e1b0723

Leetmonkey In Action via Inference

Browse files

Files changed (1) hide show

app.py +8 -205

app.py CHANGED Viewed

@@ -1,211 +1,14 @@
-import os
-import re
-import logging
-import textwrap
-import autopep8
 import gradio as gr
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-import jwt
-from typing import Generator
-from fastapi import FastAPI, HTTPException, Depends
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from pydantic import BaseModel
 import spaces
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# JWT settings
-JWT_SECRET = os.environ.get("JWT_SECRET")
-if not JWT_SECRET:
-    raise ValueError("JWT_SECRET environment variable is not set")
-JWT_ALGORITHM = "HS256"
-# Model settings
-MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
-REPO_ID = "sugiv/leetmonkey-peft-gguf"
-# Generation parameters
-generation_kwargs = {
-    "max_tokens": 2048,
-    "stop": ["```", "### Instruction:", "### Response:"],
-    "echo": False,
-    "temperature": 0.2,
-    "top_k": 50,
-    "top_p": 0.95,
-    "repeat_penalty": 1.1
-}
-@spaces.GPU
-def download_model(model_name: str) -> str:
-    logger.info(f"Downloading model: {model_name}")
-    model_path = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=model_name,
-        cache_dir="./models",
-        force_download=True,
-        resume_download=True
-    )
-    logger.info(f"Model downloaded: {model_path}")
-    return model_path
-# Download and load the 8-bit model at startup
-model_path = download_model(MODEL_NAME)
 @spaces.GPU
-def load_model(model_path):
-    return Llama(
-        model_path=model_path,
-        n_ctx=2048,
-        n_threads=4,
-        n_gpu_layers=-1,  # Use all available GPU layers
-        verbose=False
-    )
-llm = load_model(model_path)
-logger.info("8-bit model loaded successfully")
-@spaces.GPU
-def generate_solution(instruction: str) -> str:
-    system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
-    full_prompt = f"""### Instruction:
-{system_prompt}
-Implement the following function for the LeetCode problem:
-{instruction}
-### Response:
-Here's the complete Python function implementation:
-```python
-"""
-    response = llm(full_prompt, **generation_kwargs)
-    return response["choices"][0]["text"]
-def extract_and_format_code(text: str) -> str:
-    # Extract code between triple backticks
-    code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
-    if code_match:
-        code = code_match.group(1)
-    else:
-        code = text
-    # Remove any text before the function definition
-    code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
-    # Dedent the code to remove any common leading whitespace
-    code = textwrap.dedent(code)
-    # Split the code into lines
-    lines = code.split('\n')
-    # Find the function definition line
-    func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
-    # Ensure proper indentation
-    indented_lines = [lines[func_def_index]]  # Keep the function definition as is
-    for line in lines[func_def_index + 1:]:
-        if line.strip():  # If the line is not empty
-            indented_lines.append('    ' + line)  # Add 4 spaces of indentation
-        else:
-            indented_lines.append(line)  # Keep empty lines as is
-    formatted_code = '\n'.join(indented_lines)
-    try:
-        return autopep8.fix_code(formatted_code)
-    except:
-        return formatted_code
-security = HTTPBearer()
-app = FastAPI()
-class ProblemRequest(BaseModel):
-    instruction: str
-def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
-    try:
-        jwt.decode(credentials.credentials, JWT_SECRET, algorithms=[JWT_ALGORITHM])
-        return True
-    except jwt.PyJWTError:
-        raise HTTPException(status_code=401, detail="Invalid token")
-@app.post("/generate_solution")
-@spaces.GPU
-async def generate_solution_api(request: ProblemRequest, authorized: bool = Depends(verify_token)):
-    logger.info("Generating solution")
-    generated_output = generate_solution(request.instruction)
-    formatted_code = extract_and_format_code(generated_output)
-    logger.info("Solution generated successfully")
-    return {"solution": formatted_code}
-@app.post("/stream_solution")
-@spaces.GPU
-async def stream_solution_api(request: ProblemRequest, authorized: bool = Depends(verify_token)):
-    async def generate():
-        logger.info("Streaming solution")
-        system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
-        full_prompt = f"""### Instruction:
-{system_prompt}
-Implement the following function for the LeetCode problem:
-{request.instruction}
-### Response:
-Here's the complete Python function implementation:
-```python
-"""
-        generated_text = ""
-        for chunk in llm(full_prompt, stream=True, **generation_kwargs):
-            token = chunk["choices"]["text"]
-            generated_text += token
-            yield token
-        formatted_code = extract_and_format_code(generated_text)
-        logger.info("Solution generated successfully")
-        yield formatted_code
-    return generate()
-# Gradio wrapper for FastAPI
-def gradio_wrapper(app):
-    @spaces.GPU
-    def inference(instruction, token):
-        import requests
-        url = "http://localhost:8000/generate_solution"
-        headers = {"Authorization": f"Bearer {token}"}
-        response = requests.post(url, json={"instruction": instruction}, headers=headers)
-        if response.status_code == 200:
-            return response.json()["solution"]
-        else:
-            return f"Error: {response.status_code}, {response.text}"
-    iface = gr.Interface(
-        fn=inference,
-        inputs=[
-            gr.Textbox(label="LeetCode Problem Instruction"),
-            gr.Textbox(label="JWT Token")
-        ],
-        outputs=gr.Code(label="Generated Solution"),
-        title="LeetCode Problem Solver API",
-        description="Enter a LeetCode problem instruction and your JWT token to generate a solution."
-    )
-    return iface
-if __name__ == "__main__":
-    import uvicorn
-    from threading import Thread
-    # Start FastAPI in a separate thread
-    Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=8000)).start()
-    # Launch Gradio interface
-    iface = gradio_wrapper(app)
-    iface.launch(share=True)

 import gradio as gr
 import spaces
+import torch
+zero = torch.Tensor([0]).cuda()
+print(zero.device) # <-- 'cpu' 🤔
 @spaces.GPU
+def greet(n):
+    print(zero.device) # <-- 'cuda:0' 🤗
+    return f"Hello {zero + n} Tensor"
+demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
+demo.launch()