sidbhasin commited on
Commit
ce8a794
1 Parent(s): 2a47abd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -47
app.py CHANGED
@@ -2,77 +2,93 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gc
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Initialize model and tokenizer globally
7
  def load_model():
8
- model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
 
9
  try:
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_name,
13
  device_map="auto",
14
  trust_remote_code=True,
15
- torch_dtype=torch.float16
 
 
 
 
16
  )
17
  return model, tokenizer
18
  except Exception as e:
19
  print(f"Error loading model: {str(e)}")
20
  return None, None
21
 
 
 
22
  model, tokenizer = load_model()
 
23
 
24
  def generate_code(prompt):
25
  try:
26
- # Clear CUDA cache
27
- if torch.cuda.is_available():
28
- torch.cuda.empty_cache()
29
- gc.collect()
30
 
31
  # Prepare the prompt
32
  messages = [
33
- {"role": "system", "content": "You are an expert Python developer. Generate clean, efficient, and well-commented code based on the user's requirements."},
34
  {"role": "user", "content": f"Create a Python tool for the following requirement: {prompt}"}
35
  ]
36
 
37
- # Format the messages using the chat template
38
- text = tokenizer.apply_chat_template(
39
- messages,
40
- tokenize=False,
41
- add_generation_prompt=True
42
- )
43
-
44
  # Generate the response
45
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
46
- generated_ids = model.generate(
47
- **model_inputs,
48
- max_new_tokens=1024,
 
 
49
  temperature=0.7,
50
  top_p=0.9,
51
  repetition_penalty=1.1,
52
- do_sample=True
 
 
53
  )
54
 
55
- # Decode the response
56
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
57
 
58
- # Extract only the code part from the response
59
  code_start = response.find("```python")
60
- code_end = response.find("```", code_start + 8)
61
 
62
  if code_start != -1 and code_end != -1:
63
  code = response[code_start + 8:code_end].strip()
64
  else:
65
- code = response
66
-
 
 
 
67
  return code
68
 
69
  except Exception as e:
 
70
  raise gr.Error(f"Code generation failed: {str(e)}")
71
- finally:
72
- # Clear memory
73
- if torch.cuda.is_available():
74
- torch.cuda.empty_cache()
75
- gc.collect()
76
 
77
  # Create the Gradio interface
78
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
@@ -80,7 +96,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
80
  """
81
  # 🛠️ AI Tool Builder by Syncmerce
82
 
83
- Generate production-ready Python code for your tools using AI. Simply describe what you want to build!
84
  """
85
  )
86
 
@@ -99,17 +115,16 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
99
  code_output = gr.Code(
100
  label="Generated Code",
101
  language="python",
102
- lines=25,
103
  show_label=True
104
  )
105
 
106
- # Add examples
107
  gr.Examples(
108
  examples=[
109
- ["Create a PDF text extractor that can process multiple files and save the extracted text to a txt file"],
110
- ["Build a web scraper that extracts product prices from Amazon and saves results to CSV"],
111
- ["Create an image processing tool that can resize multiple images and add watermarks"],
112
- ["Build a YouTube video downloader with progress bar and quality selection"],
113
  ],
114
  inputs=prompt_input,
115
  outputs=code_output,
@@ -132,23 +147,22 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
132
  api_name="clear"
133
  )
134
 
135
- # Add footer
136
  gr.Markdown(
137
  """
138
  ### Tips for better results:
139
- - Be specific about the features you want
140
- - Mention input/output formats
141
- - Specify any special requirements or libraries
142
-
143
- *Note: Generated code may need minor adjustments based on your specific use case.*
144
  """
145
  )
146
 
147
- # Launch the app
148
  demo.launch(
149
  share=True,
150
  enable_queue=True,
151
  show_error=True,
152
  server_name="0.0.0.0",
153
- server_port=7860
 
 
154
  )
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gc
5
+ import os
6
+
7
+ # Set environment variables for cache management
8
+ os.environ['TRANSFORMERS_CACHE'] = '/tmp/.cache/huggingface'
9
+ os.environ['HF_HOME'] = '/tmp/.cache/huggingface'
10
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
11
+
12
+ def cleanup_memory():
13
+ if torch.cuda.is_available():
14
+ torch.cuda.empty_cache()
15
+ torch.cuda.synchronize()
16
+ gc.collect()
17
 
 
18
  def load_model():
19
+ cleanup_memory()
20
+ model_name = "Qwen/Qwen1.5-0.5B" # Using smaller model
21
  try:
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ model_name,
24
+ trust_remote_code=True,
25
+ use_fast=False,
26
+ cache_dir="/tmp/.cache/huggingface"
27
+ )
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_name,
30
  device_map="auto",
31
  trust_remote_code=True,
32
+ torch_dtype=torch.float16,
33
+ low_cpu_mem_usage=True,
34
+ offload_folder="/tmp/offload",
35
+ offload_state_dict=True,
36
+ cache_dir="/tmp/.cache/huggingface"
37
  )
38
  return model, tokenizer
39
  except Exception as e:
40
  print(f"Error loading model: {str(e)}")
41
  return None, None
42
 
43
+ # Initialize model and tokenizer
44
+ print("Loading model...")
45
  model, tokenizer = load_model()
46
+ print("Model loaded successfully!")
47
 
48
  def generate_code(prompt):
49
  try:
50
+ cleanup_memory()
 
 
 
51
 
52
  # Prepare the prompt
53
  messages = [
54
+ {"role": "system", "content": "You are an expert Python developer. Generate clean, efficient, and well-commented code based on the user's requirements. Only provide the code without any explanations."},
55
  {"role": "user", "content": f"Create a Python tool for the following requirement: {prompt}"}
56
  ]
57
 
 
 
 
 
 
 
 
58
  # Generate the response
59
+ inputs = tokenizer.encode(str(messages), return_tensors="pt", truncation=True, max_length=512)
60
+ inputs = inputs.to(model.device)
61
+
62
+ outputs = model.generate(
63
+ inputs,
64
+ max_length=1024,
65
  temperature=0.7,
66
  top_p=0.9,
67
  repetition_penalty=1.1,
68
+ do_sample=True,
69
+ pad_token_id=tokenizer.eos_token_id,
70
+ num_return_sequences=1
71
  )
72
 
73
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
74
 
75
+ # Extract code from response
76
  code_start = response.find("```python")
77
+ code_end = response.find("```", code_start + 8) if code_start != -1 else -1
78
 
79
  if code_start != -1 and code_end != -1:
80
  code = response[code_start + 8:code_end].strip()
81
  else:
82
+ # If no code block found, try to extract any Python-like code
83
+ code = "\n".join([line for line in response.split("\n")
84
+ if line.strip() and not line.startswith(("#", "//", "/*"))])
85
+
86
+ cleanup_memory()
87
  return code
88
 
89
  except Exception as e:
90
+ cleanup_memory()
91
  raise gr.Error(f"Code generation failed: {str(e)}")
 
 
 
 
 
92
 
93
  # Create the Gradio interface
94
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as demo:
 
96
  """
97
  # 🛠️ AI Tool Builder by Syncmerce
98
 
99
+ Generate Python code for your tools using AI. Simply describe what you want to build!
100
  """
101
  )
102
 
 
115
  code_output = gr.Code(
116
  label="Generated Code",
117
  language="python",
118
+ lines=20,
119
  show_label=True
120
  )
121
 
122
+ # Add examples (keeping them simple to save memory)
123
  gr.Examples(
124
  examples=[
125
+ ["Create a simple PDF text extractor"],
126
+ ["Build a basic web scraper for product prices"],
127
+ ["Create an image resizing tool"],
 
128
  ],
129
  inputs=prompt_input,
130
  outputs=code_output,
 
147
  api_name="clear"
148
  )
149
 
 
150
  gr.Markdown(
151
  """
152
  ### Tips for better results:
153
+ - Keep your requirements clear and concise
154
+ - Specify input/output formats if needed
155
+ - Mention any specific libraries you want to use
 
 
156
  """
157
  )
158
 
159
+ # Launch the app with optimized settings
160
  demo.launch(
161
  share=True,
162
  enable_queue=True,
163
  show_error=True,
164
  server_name="0.0.0.0",
165
+ server_port=7860,
166
+ cache_examples=True,
167
+ max_threads=4
168
  )