JSenkCC commited on
Commit
218964f
·
verified ·
1 Parent(s): 927bdf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -59
app.py CHANGED
@@ -323,70 +323,50 @@ headers = {"Authorization": f"Bearer {qwen}"}
323
 
324
  def clean_output(output):
325
  """
326
- Cleans the output from the Hugging Face model to ensure only the relevant details are included.
 
327
  """
328
- # Remove known markers for prompts and redundant sections
329
- markers_to_exclude = [
330
- "Functions identified by Gemini",
331
- "Tasks:",
332
- "Return only the required information",
333
- "User-specified functionality:"
334
- ]
335
-
336
  lines = output.splitlines()
337
- filtered_lines = []
338
- seen_sections = set()
339
-
340
- for line in lines:
341
- line = line.strip()
342
- # Skip lines that are part of excluded markers
343
- if any(marker in line for marker in markers_to_exclude):
344
- continue
345
-
346
- # Skip duplicate sections
347
- if line.startswith("Project Summary:") or line.startswith("Functionality Summary:"):
348
- if line in seen_sections:
349
- continue
350
- seen_sections.add(line)
351
-
352
- # Skip empty lines
353
- if line:
354
- filtered_lines.append(line)
355
-
356
  return "\n".join(filtered_lines)
357
 
358
 
359
 
360
 
361
 
 
362
  def validate_and_generate_documentation(api_url, headers, gemini_output, file_contents, functionality_description):
363
  """
364
- Uses the Hugging Face Inference API to generate documentation in chunks to avoid token limits
365
- and ensures only clean output is returned.
366
  """
367
- # Generate the refined prompt for the Qwen model
368
  base_prompt = f"""
369
  User-specified functionality: '{functionality_description}'
370
  Functions identified by Gemini:
371
  {gemini_output}
372
  Qwen, identify the functions provided above in the project, and with the User-specified functionality in mind, perform these tasks:
373
- 1. Generate a project summary:
374
- '
375
  Project Summary:
376
- <Qwen, include project description and library or module dependencies>
377
- '
378
- 2. Refine the user-defined functionality:
379
- '
380
  Functionality Summary:
381
- <Qwen, provide an enhanced description of user-specified functionality>
382
- '
383
- 3. Describe the functionality flow:
384
- '
385
  Functionality Flow:
386
- <Qwen, explain the sequence of functions and data flow>
387
- '
388
- 4. Generate detailed documentation for each function:
389
- '
390
  Function Documentation:
391
  For each relevant function:
392
  - Summary: <Description of the function's purpose>
@@ -394,14 +374,14 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
394
  - Outputs: <Details of outputs and their types>
395
  - Dependencies: <Dependencies on other modules/functions>
396
  - Data structures: <Details of data structures used>
397
- - Algorithmic Details: <Description of the algorithm used>
398
  - Error Handling: <Description of how the function handles errors>
399
  - Assumptions: <Any assumptions the function makes>
400
- - Example Usage: <Example demonstrating usage>
401
- '
402
  """
403
-
404
- # Split file contents into chunks to avoid exceeding token limits
405
  max_chunk_size = 12000 # Adjust for tokenization overhead
406
  file_chunks = []
407
  current_chunk = base_prompt
@@ -414,13 +394,14 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
414
  else:
415
  current_chunk += chunk_content
416
 
 
417
  if current_chunk not in file_chunks:
418
  file_chunks.append(current_chunk)
419
 
420
- # Process each chunk through the API
421
  full_output = ""
422
  for chunk in file_chunks:
423
- payload = {"inputs": chunk, "parameters": {"max_new_tokens": 2048}}
424
  response = requests.post(api_url, headers=headers, json=payload)
425
 
426
  if response.status_code == 200:
@@ -431,15 +412,12 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
431
  output = api_response.get("generated_text", "")
432
  else:
433
  raise ValueError("Unexpected response format from Hugging Face API.")
434
- full_output += output
 
435
  else:
436
  raise ValueError(f"Error during API call: {response.status_code}, {response.text}")
437
 
438
- # Apply cleaning to remove unnecessary content
439
- return clean_output(full_output)
440
-
441
-
442
-
443
 
444
  def generate_documentation_page():
445
  st.subheader(f"Generate Documentation for {st.session_state.current_project}")
@@ -474,7 +452,7 @@ def generate_documentation_page():
474
  API_URL, headers, gemini_result, file_contents, functionality
475
  )
476
 
477
- # Display the cleaned final documentation
478
  st.success("Documentation generated successfully!")
479
  st.text_area("Generated Documentation", final_documentation, height=600)
480
  except Exception as e:
@@ -489,6 +467,7 @@ def generate_documentation_page():
489
  st.session_state.page = "project_view"
490
  st.rerun()
491
 
 
492
 
493
 
494
 
 
323
 
324
  def clean_output(output):
325
  """
326
+ Cleans the output from the model to ensure only relevant content is included.
327
+ Strips away any redundant prompts, instructions, and markers.
328
  """
 
 
 
 
 
 
 
 
329
  lines = output.splitlines()
330
+ filtered_lines = [
331
+ line for line in lines if not (
332
+ line.startswith("File:") or
333
+ line.startswith("User-specified functionality:") or
334
+ line.startswith("Functions identified by Gemini:") or
335
+ line.startswith("Qwen,") or
336
+ line.startswith("<")
337
+ line.startswith("Tasks:") or
338
+ line.startswith("'") or
339
+ line.strip() == "" # Exclude empty or irrelevant lines
340
+ )
341
+ ]
 
 
 
 
 
 
 
342
  return "\n".join(filtered_lines)
343
 
344
 
345
 
346
 
347
 
348
+
349
  def validate_and_generate_documentation(api_url, headers, gemini_output, file_contents, functionality_description):
350
  """
351
+ Generates documentation by communicating with the Qwen model in manageable chunks.
352
+ Cleans the output to ensure user sees only relevant information.
353
  """
354
+ # Restore the detailed prompt for Qwen
355
  base_prompt = f"""
356
  User-specified functionality: '{functionality_description}'
357
  Functions identified by Gemini:
358
  {gemini_output}
359
  Qwen, identify the functions provided above in the project, and with the User-specified functionality in mind, perform these tasks:
360
+ 1. Generate a summary of the project in this format:
 
361
  Project Summary:
362
+ <Qwen, include project description and library or module dependencies here>\n
363
+ 2. Refine the user-defined functionality with your answer in this format:
 
 
364
  Functionality Summary:
365
+ <Qwen, provide an enhanced description of user-specified functionality here>\n
366
+ 3. Describe the flow of the functionality with your answer here:
 
 
367
  Functionality Flow:
368
+ <Qwen, Explain the sequence of functions and data flow>\n
369
+ 4. For all relevant functions, generate detailed documentation in this format:
 
 
370
  Function Documentation:
371
  For each relevant function:
372
  - Summary: <Description of the function's purpose>
 
374
  - Outputs: <Details of outputs and their types>
375
  - Dependencies: <Dependencies on other modules/functions>
376
  - Data structures: <Details of data structures used>
377
+ - Algorithmic Details: <Description of the algorithm used in the function>
378
  - Error Handling: <Description of how the function handles errors>
379
  - Assumptions: <Any assumptions the function makes>
380
+ - Example Usage: <Example demonstrating how to use the function>\n
381
+ Qwen, return only what was asked of you in the 4 tasks defined above, and nothing else
382
  """
383
+
384
+ # Split file contents into manageable chunks
385
  max_chunk_size = 12000 # Adjust for tokenization overhead
386
  file_chunks = []
387
  current_chunk = base_prompt
 
394
  else:
395
  current_chunk += chunk_content
396
 
397
+ # Add the final chunk
398
  if current_chunk not in file_chunks:
399
  file_chunks.append(current_chunk)
400
 
401
+ # Process each chunk and accumulate the cleaned output
402
  full_output = ""
403
  for chunk in file_chunks:
404
+ payload = {"inputs": chunk, "parameters": {"max_new_tokens": 1024}}
405
  response = requests.post(api_url, headers=headers, json=payload)
406
 
407
  if response.status_code == 200:
 
412
  output = api_response.get("generated_text", "")
413
  else:
414
  raise ValueError("Unexpected response format from Hugging Face API.")
415
+
416
+ full_output += clean_output(output) # Clean each chunk's output
417
  else:
418
  raise ValueError(f"Error during API call: {response.status_code}, {response.text}")
419
 
420
+ return full_output
 
 
 
 
421
 
422
  def generate_documentation_page():
423
  st.subheader(f"Generate Documentation for {st.session_state.current_project}")
 
452
  API_URL, headers, gemini_result, file_contents, functionality
453
  )
454
 
455
+ # Display the final cleaned documentation
456
  st.success("Documentation generated successfully!")
457
  st.text_area("Generated Documentation", final_documentation, height=600)
458
  except Exception as e:
 
467
  st.session_state.page = "project_view"
468
  st.rerun()
469
 
470
+
471
 
472
 
473