Spaces:

JSenkCC
/

SimplifAI

Sleeping

App Files Files Community

JSenkCC commited on Dec 3, 2024

Commit

44353d7

verified ·

1 Parent(s): 77e4c88

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -33

app.py CHANGED Viewed

@@ -270,61 +270,78 @@ def identify_required_functions(project_path, functionality_description):
     return response.text
-# Load Hugging Face model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct")
-hf_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct", device_map="auto")
 def validate_and_generate_documentation(hf_model, tokenizer, gemini_output, file_contents, functionality_description):
-    """Uses Hugging Face model to validate functions and generate full documentation."""
-    # Generate the prompt for the Hugging Face model
     prompt = f"""
-    The user-provided functionality: '{functionality_description}'
-    The functions identified by Gemini:
     {gemini_output}
     Project files:
     """
     for file_path, content in file_contents.items():
-        prompt += f"File: {os.path.basename(file_path)}\n{content[:1000]}...\n\n"  # Truncate large files for the model
     prompt += """
     Task:
     1. Validate if the functions identified by Gemini are sufficient for implementing the functionality.
     2. If not, identify all additional functions required.
-    3. For all relevant functions, generate detailed documentation in the following format:
-    """
-    prompt += """
     Project Summary:
-    Summary of the entire project, making sure to mention the language it's programmed in and any libraries or other dependencies it has
     Functionality Summary:
-    Summary of the user-specified functionality
     Functionality Flow:
-    How the programmer goes from inputting information into the first function to the last function and its output to complete
-    the functionality that is described by the user. Make sure to mention each function that is used, and how inputs and outputs flow between each other.
     Function Documentation:
-    For each file that contains the relevant functions:
-      For each function determined as relevant within the current file:
-        Summary:
-          summarize what the function does
-        Inputs:
-          the inputs and their data types, and their relevance in the scope of the specified functionality
-        Outputs:
-          the output, its data type, and its relevance in the scope of the specified functionality
-        Dependencies:
-          the dependencies of the function and where they come from
-        Data structures:
-          the data structures that the function relies on
     """
-    # Encode and call the Hugging Face model
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(hf_model.device)
-    outputs = hf_model.generate(inputs["input_ids"], max_length=4096, num_return_sequences=1)
-    # Decode the response
-    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return decoded_output
 def generate_documentation_page():
     st.subheader(f"Generate Documentation for {st.session_state.current_project}")

     return response.text
+# Load the Qwen model and tokenizer
+model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"
+hf_model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 def validate_and_generate_documentation(hf_model, tokenizer, gemini_output, file_contents, functionality_description):
+    """Uses Qwen model to validate functions and generate full documentation."""
+    # Generate the prompt for the Qwen model
     prompt = f"""
+    User-specified functionality: '{functionality_description}'
+    Functions identified by Gemini:
     {gemini_output}
     Project files:
     """
     for file_path, content in file_contents.items():
+        # Truncate content to avoid exceeding token limits
+        truncated_content = content[:1000] if len(content) > 1000 else content
+        prompt += f"File: {os.path.basename(file_path)}\n{truncated_content}\n\n"
     prompt += """
     Task:
     1. Validate if the functions identified by Gemini are sufficient for implementing the functionality.
     2. If not, identify all additional functions required.
+    3. For all relevant functions, generate detailed documentation in this format:
     Project Summary:
+    <Include project description and dependencies>
     Functionality Summary:
+    <Description of user-specified functionality>
     Functionality Flow:
+    <Explain the sequence of functions and data flow>
     Function Documentation:
+    For each relevant function:
+      - Summary: <Description of the function's purpose>
+      - Inputs: <Details of inputs and their types>
+      - Outputs: <Details of outputs and their types>
+      - Dependencies: <Dependencies on other modules/functions>
+      - Data structures: <Details of data structures used>
     """
+    # Prepare the chat-style input for Qwen
+    messages = [
+        {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=32768).to(hf_model.device)
+    # Generate output from the model
+    generated_ids = hf_model.generate(
+        **model_inputs,
+        max_new_tokens=2048
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    # Decode and return the response
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
 def generate_documentation_page():
     st.subheader(f"Generate Documentation for {st.session_state.current_project}")