sugiv commited on
Commit
5b97345
·
1 Parent(s): e1b0723

Leetmonkey In Action via Inference

Browse files
Files changed (1) hide show
  1. app.py +8 -205
app.py CHANGED
@@ -1,211 +1,14 @@
1
- import os
2
- import re
3
- import logging
4
- import textwrap
5
- import autopep8
6
  import gradio as gr
7
- from huggingface_hub import hf_hub_download
8
- from llama_cpp import Llama
9
- import jwt
10
- from typing import Generator
11
- from fastapi import FastAPI, HTTPException, Depends
12
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
13
- from pydantic import BaseModel
14
  import spaces
 
15
 
16
- # Set up logging
17
- logging.basicConfig(level=logging.INFO)
18
- logger = logging.getLogger(__name__)
19
-
20
- # JWT settings
21
- JWT_SECRET = os.environ.get("JWT_SECRET")
22
- if not JWT_SECRET:
23
- raise ValueError("JWT_SECRET environment variable is not set")
24
- JWT_ALGORITHM = "HS256"
25
-
26
- # Model settings
27
- MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
28
- REPO_ID = "sugiv/leetmonkey-peft-gguf"
29
-
30
- # Generation parameters
31
- generation_kwargs = {
32
- "max_tokens": 2048,
33
- "stop": ["```", "### Instruction:", "### Response:"],
34
- "echo": False,
35
- "temperature": 0.2,
36
- "top_k": 50,
37
- "top_p": 0.95,
38
- "repeat_penalty": 1.1
39
- }
40
-
41
- @spaces.GPU
42
- def download_model(model_name: str) -> str:
43
- logger.info(f"Downloading model: {model_name}")
44
- model_path = hf_hub_download(
45
- repo_id=REPO_ID,
46
- filename=model_name,
47
- cache_dir="./models",
48
- force_download=True,
49
- resume_download=True
50
- )
51
- logger.info(f"Model downloaded: {model_path}")
52
- return model_path
53
-
54
- # Download and load the 8-bit model at startup
55
- model_path = download_model(MODEL_NAME)
56
 
57
  @spaces.GPU
58
- def load_model(model_path):
59
- return Llama(
60
- model_path=model_path,
61
- n_ctx=2048,
62
- n_threads=4,
63
- n_gpu_layers=-1, # Use all available GPU layers
64
- verbose=False
65
- )
66
-
67
- llm = load_model(model_path)
68
- logger.info("8-bit model loaded successfully")
69
-
70
- @spaces.GPU
71
- def generate_solution(instruction: str) -> str:
72
- system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
73
- full_prompt = f"""### Instruction:
74
- {system_prompt}
75
-
76
- Implement the following function for the LeetCode problem:
77
-
78
- {instruction}
79
-
80
- ### Response:
81
- Here's the complete Python function implementation:
82
-
83
- ```python
84
- """
85
-
86
- response = llm(full_prompt, **generation_kwargs)
87
- return response["choices"][0]["text"]
88
-
89
- def extract_and_format_code(text: str) -> str:
90
- # Extract code between triple backticks
91
- code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
92
- if code_match:
93
- code = code_match.group(1)
94
- else:
95
- code = text
96
-
97
- # Remove any text before the function definition
98
- code = re.sub(r'^.*?(?=def\s+\w+\s*\()', '', code, flags=re.DOTALL)
99
-
100
- # Dedent the code to remove any common leading whitespace
101
- code = textwrap.dedent(code)
102
-
103
- # Split the code into lines
104
- lines = code.split('\n')
105
-
106
- # Find the function definition line
107
- func_def_index = next((i for i, line in enumerate(lines) if line.strip().startswith('def ')), 0)
108
-
109
- # Ensure proper indentation
110
- indented_lines = [lines[func_def_index]] # Keep the function definition as is
111
- for line in lines[func_def_index + 1:]:
112
- if line.strip(): # If the line is not empty
113
- indented_lines.append(' ' + line) # Add 4 spaces of indentation
114
- else:
115
- indented_lines.append(line) # Keep empty lines as is
116
-
117
- formatted_code = '\n'.join(indented_lines)
118
-
119
- try:
120
- return autopep8.fix_code(formatted_code)
121
- except:
122
- return formatted_code
123
-
124
- security = HTTPBearer()
125
- app = FastAPI()
126
-
127
- class ProblemRequest(BaseModel):
128
- instruction: str
129
-
130
- def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
131
- try:
132
- jwt.decode(credentials.credentials, JWT_SECRET, algorithms=[JWT_ALGORITHM])
133
- return True
134
- except jwt.PyJWTError:
135
- raise HTTPException(status_code=401, detail="Invalid token")
136
-
137
- @app.post("/generate_solution")
138
- @spaces.GPU
139
- async def generate_solution_api(request: ProblemRequest, authorized: bool = Depends(verify_token)):
140
- logger.info("Generating solution")
141
- generated_output = generate_solution(request.instruction)
142
- formatted_code = extract_and_format_code(generated_output)
143
- logger.info("Solution generated successfully")
144
- return {"solution": formatted_code}
145
-
146
- @app.post("/stream_solution")
147
- @spaces.GPU
148
- async def stream_solution_api(request: ProblemRequest, authorized: bool = Depends(verify_token)):
149
- async def generate():
150
- logger.info("Streaming solution")
151
- system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
152
- full_prompt = f"""### Instruction:
153
- {system_prompt}
154
-
155
- Implement the following function for the LeetCode problem:
156
-
157
- {request.instruction}
158
-
159
- ### Response:
160
- Here's the complete Python function implementation:
161
-
162
- ```python
163
- """
164
-
165
- generated_text = ""
166
- for chunk in llm(full_prompt, stream=True, **generation_kwargs):
167
- token = chunk["choices"]["text"]
168
- generated_text += token
169
- yield token
170
-
171
- formatted_code = extract_and_format_code(generated_text)
172
- logger.info("Solution generated successfully")
173
- yield formatted_code
174
-
175
- return generate()
176
-
177
- # Gradio wrapper for FastAPI
178
- def gradio_wrapper(app):
179
- @spaces.GPU
180
- def inference(instruction, token):
181
- import requests
182
- url = "http://localhost:8000/generate_solution"
183
- headers = {"Authorization": f"Bearer {token}"}
184
- response = requests.post(url, json={"instruction": instruction}, headers=headers)
185
- if response.status_code == 200:
186
- return response.json()["solution"]
187
- else:
188
- return f"Error: {response.status_code}, {response.text}"
189
-
190
- iface = gr.Interface(
191
- fn=inference,
192
- inputs=[
193
- gr.Textbox(label="LeetCode Problem Instruction"),
194
- gr.Textbox(label="JWT Token")
195
- ],
196
- outputs=gr.Code(label="Generated Solution"),
197
- title="LeetCode Problem Solver API",
198
- description="Enter a LeetCode problem instruction and your JWT token to generate a solution."
199
- )
200
- return iface
201
-
202
- if __name__ == "__main__":
203
- import uvicorn
204
- from threading import Thread
205
-
206
- # Start FastAPI in a separate thread
207
- Thread(target=lambda: uvicorn.run(app, host="0.0.0.0", port=8000)).start()
208
 
209
- # Launch Gradio interface
210
- iface = gradio_wrapper(app)
211
- iface.launch(share=True)
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
2
  import spaces
3
+ import torch
4
 
5
+ zero = torch.Tensor([0]).cuda()
6
+ print(zero.device) # <-- 'cpu' 🤔
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  @spaces.GPU
9
+ def greet(n):
10
+ print(zero.device) # <-- 'cuda:0' 🤗
11
+ return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
+ demo.launch()