JSenkCC commited on
Commit
44353d7
·
verified ·
1 Parent(s): 77e4c88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -33
app.py CHANGED
@@ -270,61 +270,78 @@ def identify_required_functions(project_path, functionality_description):
270
  return response.text
271
 
272
 
273
- # Load Hugging Face model and tokenizer
274
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct")
275
- hf_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct", device_map="auto")
 
 
 
 
 
276
 
277
  def validate_and_generate_documentation(hf_model, tokenizer, gemini_output, file_contents, functionality_description):
278
- """Uses Hugging Face model to validate functions and generate full documentation."""
279
- # Generate the prompt for the Hugging Face model
280
  prompt = f"""
281
- The user-provided functionality: '{functionality_description}'
282
- The functions identified by Gemini:
283
  {gemini_output}
284
 
285
  Project files:
286
  """
287
  for file_path, content in file_contents.items():
288
- prompt += f"File: {os.path.basename(file_path)}\n{content[:1000]}...\n\n" # Truncate large files for the model
 
 
289
 
290
  prompt += """
291
  Task:
292
  1. Validate if the functions identified by Gemini are sufficient for implementing the functionality.
293
  2. If not, identify all additional functions required.
294
- 3. For all relevant functions, generate detailed documentation in the following format:
295
- """
296
- prompt += """
297
  Project Summary:
298
- Summary of the entire project, making sure to mention the language it's programmed in and any libraries or other dependencies it has
299
 
300
  Functionality Summary:
301
- Summary of the user-specified functionality
302
 
303
  Functionality Flow:
304
- How the programmer goes from inputting information into the first function to the last function and its output to complete
305
- the functionality that is described by the user. Make sure to mention each function that is used, and how inputs and outputs flow between each other.
306
 
307
  Function Documentation:
308
- For each file that contains the relevant functions:
309
- For each function determined as relevant within the current file:
310
- Summary:
311
- summarize what the function does
312
- Inputs:
313
- the inputs and their data types, and their relevance in the scope of the specified functionality
314
- Outputs:
315
- the output, its data type, and its relevance in the scope of the specified functionality
316
- Dependencies:
317
- the dependencies of the function and where they come from
318
- Data structures:
319
- the data structures that the function relies on
320
  """
321
- # Encode and call the Hugging Face model
322
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(hf_model.device)
323
- outputs = hf_model.generate(inputs["input_ids"], max_length=4096, num_return_sequences=1)
324
 
325
- # Decode the response
326
- decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
327
- return decoded_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
  def generate_documentation_page():
330
  st.subheader(f"Generate Documentation for {st.session_state.current_project}")
 
270
  return response.text
271
 
272
 
273
+ # Load the Qwen model and tokenizer
274
+ model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"
275
+ hf_model = AutoModelForCausalLM.from_pretrained(
276
+ model_name,
277
+ torch_dtype="auto",
278
+ device_map="auto"
279
+ )
280
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
281
 
282
  def validate_and_generate_documentation(hf_model, tokenizer, gemini_output, file_contents, functionality_description):
283
+ """Uses Qwen model to validate functions and generate full documentation."""
284
+ # Generate the prompt for the Qwen model
285
  prompt = f"""
286
+ User-specified functionality: '{functionality_description}'
287
+ Functions identified by Gemini:
288
  {gemini_output}
289
 
290
  Project files:
291
  """
292
  for file_path, content in file_contents.items():
293
+ # Truncate content to avoid exceeding token limits
294
+ truncated_content = content[:1000] if len(content) > 1000 else content
295
+ prompt += f"File: {os.path.basename(file_path)}\n{truncated_content}\n\n"
296
 
297
  prompt += """
298
  Task:
299
  1. Validate if the functions identified by Gemini are sufficient for implementing the functionality.
300
  2. If not, identify all additional functions required.
301
+ 3. For all relevant functions, generate detailed documentation in this format:
302
+
 
303
  Project Summary:
304
+ <Include project description and dependencies>
305
 
306
  Functionality Summary:
307
+ <Description of user-specified functionality>
308
 
309
  Functionality Flow:
310
+ <Explain the sequence of functions and data flow>
 
311
 
312
  Function Documentation:
313
+ For each relevant function:
314
+ - Summary: <Description of the function's purpose>
315
+ - Inputs: <Details of inputs and their types>
316
+ - Outputs: <Details of outputs and their types>
317
+ - Dependencies: <Dependencies on other modules/functions>
318
+ - Data structures: <Details of data structures used>
 
 
 
 
 
 
319
  """
 
 
 
320
 
321
+ # Prepare the chat-style input for Qwen
322
+ messages = [
323
+ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
324
+ {"role": "user", "content": prompt}
325
+ ]
326
+ text = tokenizer.apply_chat_template(
327
+ messages,
328
+ tokenize=False,
329
+ add_generation_prompt=True
330
+ )
331
+ model_inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=32768).to(hf_model.device)
332
+
333
+ # Generate output from the model
334
+ generated_ids = hf_model.generate(
335
+ **model_inputs,
336
+ max_new_tokens=2048
337
+ )
338
+ generated_ids = [
339
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
340
+ ]
341
+
342
+ # Decode and return the response
343
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
344
+ return response
345
 
346
  def generate_documentation_page():
347
  st.subheader(f"Generate Documentation for {st.session_state.current_project}")