Spaces:

JSenkCC
/

SimplifAI

Sleeping

App Files Files Community

JSenkCC commited on Dec 3, 2024

Commit

218964f

verified ·

1 Parent(s): 927bdf6

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -59

app.py CHANGED Viewed

@@ -323,70 +323,50 @@ headers = {"Authorization": f"Bearer {qwen}"}
 def clean_output(output):
     """
-    Cleans the output from the Hugging Face model to ensure only the relevant details are included.
     """
-    # Remove known markers for prompts and redundant sections
-    markers_to_exclude = [
-        "Functions identified by Gemini",
-        "Tasks:",
-        "Return only the required information",
-        "User-specified functionality:"
-    ]
     lines = output.splitlines()
-    filtered_lines = []
-    seen_sections = set()
-    for line in lines:
-        line = line.strip()
-        # Skip lines that are part of excluded markers
-        if any(marker in line for marker in markers_to_exclude):
-            continue
-        # Skip duplicate sections
-        if line.startswith("Project Summary:") or line.startswith("Functionality Summary:"):
-            if line in seen_sections:
-                continue
-            seen_sections.add(line)
-        # Skip empty lines
-        if line:
-            filtered_lines.append(line)
     return "\n".join(filtered_lines)
 def validate_and_generate_documentation(api_url, headers, gemini_output, file_contents, functionality_description):
     """
-    Uses the Hugging Face Inference API to generate documentation in chunks to avoid token limits
-    and ensures only clean output is returned.
     """
-    # Generate the refined prompt for the Qwen model
     base_prompt = f"""
     User-specified functionality: '{functionality_description}'
     Functions identified by Gemini:
     {gemini_output}
     Qwen, identify the functions provided above in the project, and with the User-specified functionality in mind, perform these tasks:
-    1. Generate a project summary:
-        '
         Project Summary:
-        <Qwen, include project description and library or module dependencies>
-        '
-    2. Refine the user-defined functionality:
-        '
         Functionality Summary:
-        <Qwen, provide an enhanced description of user-specified functionality>
-        '
-    3. Describe the functionality flow:
-        '
         Functionality Flow:
-        <Qwen, explain the sequence of functions and data flow>
-        '
-    4. Generate detailed documentation for each function:
-        '
         Function Documentation:
         For each relevant function:
           - Summary: <Description of the function's purpose>
@@ -394,14 +374,14 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
           - Outputs: <Details of outputs and their types>
           - Dependencies: <Dependencies on other modules/functions>
           - Data structures: <Details of data structures used>
-          - Algorithmic Details: <Description of the algorithm used>
           - Error Handling: <Description of how the function handles errors>
           - Assumptions: <Any assumptions the function makes>
-          - Example Usage: <Example demonstrating usage>
-        '
     """
-    # Split file contents into chunks to avoid exceeding token limits
     max_chunk_size = 12000  # Adjust for tokenization overhead
     file_chunks = []
     current_chunk = base_prompt
@@ -414,13 +394,14 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
         else:
             current_chunk += chunk_content
     if current_chunk not in file_chunks:
         file_chunks.append(current_chunk)
-    # Process each chunk through the API
     full_output = ""
     for chunk in file_chunks:
-        payload = {"inputs": chunk, "parameters": {"max_new_tokens": 2048}}
         response = requests.post(api_url, headers=headers, json=payload)
         if response.status_code == 200:
@@ -431,15 +412,12 @@ def validate_and_generate_documentation(api_url, headers, gemini_output, file_co
                 output = api_response.get("generated_text", "")
             else:
                 raise ValueError("Unexpected response format from Hugging Face API.")
-            full_output += output
         else:
             raise ValueError(f"Error during API call: {response.status_code}, {response.text}")
-    # Apply cleaning to remove unnecessary content
-    return clean_output(full_output)
 def generate_documentation_page():
     st.subheader(f"Generate Documentation for {st.session_state.current_project}")
@@ -474,7 +452,7 @@ def generate_documentation_page():
                         API_URL, headers, gemini_result, file_contents, functionality
                     )
-                    # Display the cleaned final documentation
                     st.success("Documentation generated successfully!")
                     st.text_area("Generated Documentation", final_documentation, height=600)
                 except Exception as e:
@@ -489,6 +467,7 @@ def generate_documentation_page():
         st.session_state.page = "project_view"
         st.rerun()

 def clean_output(output):
     """
+    Cleans the output from the model to ensure only relevant content is included.
+    Strips away any redundant prompts, instructions, and markers.
     """
     lines = output.splitlines()
+    filtered_lines = [
+        line for line in lines if not (
+            line.startswith("File:") or
+            line.startswith("User-specified functionality:") or
+            line.startswith("Functions identified by Gemini:") or
+            line.startswith("Qwen,") or
+            line.startswith("<")
+            line.startswith("Tasks:") or
+            line.startswith("'") or
+            line.strip() == ""  # Exclude empty or irrelevant lines
+        )
+    ]
     return "\n".join(filtered_lines)
 def validate_and_generate_documentation(api_url, headers, gemini_output, file_contents, functionality_description):
     """
+    Generates documentation by communicating with the Qwen model in manageable chunks.
+    Cleans the output to ensure user sees only relevant information.
     """
+    # Restore the detailed prompt for Qwen
     base_prompt = f"""
     User-specified functionality: '{functionality_description}'
     Functions identified by Gemini:
     {gemini_output}
     Qwen, identify the functions provided above in the project, and with the User-specified functionality in mind, perform these tasks:
+    1. Generate a summary of the project in this format:
         Project Summary:
+        <Qwen, include project description and library or module dependencies here>\n
+    2. Refine the user-defined functionality with your answer in this format:
         Functionality Summary:
+        <Qwen, provide an enhanced description of user-specified functionality here>\n
+    3. Describe the flow of the functionality with your answer here:
         Functionality Flow:
+        <Qwen, Explain the sequence of functions and data flow>\n
+    4. For all relevant functions, generate detailed documentation in this format:
         Function Documentation:
         For each relevant function:
           - Summary: <Description of the function's purpose>
           - Outputs: <Details of outputs and their types>
           - Dependencies: <Dependencies on other modules/functions>
           - Data structures: <Details of data structures used>
+          - Algorithmic Details: <Description of the algorithm used in the function>
           - Error Handling: <Description of how the function handles errors>
           - Assumptions: <Any assumptions the function makes>
+          - Example Usage: <Example demonstrating how to use the function>\n
+    Qwen, return only what was asked of you in the 4 tasks defined above, and nothing else
     """
+    # Split file contents into manageable chunks
     max_chunk_size = 12000  # Adjust for tokenization overhead
     file_chunks = []
     current_chunk = base_prompt
         else:
             current_chunk += chunk_content
+    # Add the final chunk
     if current_chunk not in file_chunks:
         file_chunks.append(current_chunk)
+    # Process each chunk and accumulate the cleaned output
     full_output = ""
     for chunk in file_chunks:
+        payload = {"inputs": chunk, "parameters": {"max_new_tokens": 1024}}
         response = requests.post(api_url, headers=headers, json=payload)
         if response.status_code == 200:
                 output = api_response.get("generated_text", "")
             else:
                 raise ValueError("Unexpected response format from Hugging Face API.")
+            full_output += clean_output(output)  # Clean each chunk's output
         else:
             raise ValueError(f"Error during API call: {response.status_code}, {response.text}")
+    return full_output
 def generate_documentation_page():
     st.subheader(f"Generate Documentation for {st.session_state.current_project}")
                         API_URL, headers, gemini_result, file_contents, functionality
                     )
+                    # Display the final cleaned documentation
                     st.success("Documentation generated successfully!")
                     st.text_area("Generated Documentation", final_documentation, height=600)
                 except Exception as e:
         st.session_state.page = "project_view"
         st.rerun()