File size: 5,448 Bytes
8b644df
 
 
d1bab71
18ef497
d1bab71
bbcea92
2e03541
 
bbcea92
ccad9ef
 
 
 
 
 
 
 
 
 
 
 
 
2e03541
a93c076
 
 
ccad9ef
bbcea92
2e03541
8b644df
bbcea92
2e03541
 
bcc9c6b
2e03541
bbcea92
ccad9ef
 
 
 
bbcea92
96e9aa5
ccad9ef
 
112e6b8
bbcea92
 
ccad9ef
 
 
 
8b644df
bcc9c6b
8b644df
2cf6bfb
1b21c00
 
2cf6bfb
1b21c00
 
 
2cf6bfb
1b21c00
2cf6bfb
1b21c00
2cf6bfb
1b21c00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96e9aa5
655c6e2
96e9aa5
655c6e2
 
 
 
 
96e9aa5
655c6e2
96e9aa5
bbc12dc
 
2cf6bfb
 
 
 
 
 
f722dc4
 
 
 
 
 
 
 
 
2cf6bfb
 
bbc12dc
2cf6bfb
8b644df
f722dc4
8b644df
 
 
 
 
 
96e9aa5
8b644df
 
112e6b8
ccad9ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import spaces

# Load the model and tokenizer
peft_model_id = "rootxhacker/CodeAstra-7B"
config = PeftConfig.from_pretrained(peft_model_id)

# Function to move tensors to CPU
def to_cpu(obj):
    if isinstance(obj, torch.Tensor):
        return obj.cpu()
    elif isinstance(obj, list):
        return [to_cpu(item) for item in obj]
    elif isinstance(obj, tuple):
        return tuple(to_cpu(item) for item in obj)
    elif isinstance(obj, dict):
        return {key: to_cpu(value) for key, value in obj.items()}
    return obj

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    load_in_4bit=True,
    device_map='auto'
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

@spaces.GPU()
def get_completion(query, model, tokenizer):
    try:
        # Move model to CUDA
        model = model.cuda()
        # Ensure input is on CUDA
        inputs = tokenizer(query, return_tensors="pt").to('cuda')
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=1024, do_sample=True, temperature=0.7)
        # Move outputs to CPU before decoding
        outputs = to_cpu(outputs)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        return f"An error occurred: {str(e)}"
    finally:
        # Move model back to CPU to free up GPU memory
        model = model.cpu()
        torch.cuda.empty_cache()

@spaces.GPU()
def code_review(code_to_analyze):
    few_shot_prompt = f"""Review the following code for security vulnerabilities, logic flaws, and potential improvements:

```php
function authenticateUser($username, $password) {{
    $conn = new mysqli("localhost", "user", "password", "database");
    $query = "SELECT * FROM users WHERE username = '$username' AND password = '$password'";
    $result = $conn->query($query);
    if ($result->num_rows > 0) {{
        return true;
    }}
    return false;
}}
```

1. Understanding of the code:
   - This function attempts to authenticate a user by checking their username and password against a database.
   - It establishes a database connection, constructs a SQL query with the provided credentials, and executes it.
   - If any matching rows are found, it returns true (authenticated); otherwise, it returns false.

2. Potential security issues:
   - SQL Injection vulnerability: The username and password are directly inserted into the query without sanitization.
   - Plaintext password storage: The code suggests that passwords are stored in plaintext in the database.
   - Hardcoded database credentials: Connection details are hardcoded, which is a security risk.

3. Potential logic vulnerabilities:
   - Multiple user authentication: The function returns true if more than one row is returned, which could lead to authentication issues if multiple users have the same credentials.
   - No input validation: There's no checking for empty or null username/password inputs.

4. Suggestions for improvement:
   - Use prepared statements to prevent SQL injection.
   - Implement proper password hashing (e.g., using password_hash() and password_verify()).
   - Store database credentials securely and separately from the code.
   - Implement proper error handling and use constant-time comparison for passwords.
   - Add input validation for username and password.
   - Consider using a single-row fetch instead of num_rows to ensure single-user authentication.

Now, review the following code using the same approach:

{code_to_analyze}

Provide a detailed review including:
1. Understanding of the code
2. Potential security issues
3. Potential logic vulnerabilities
4. Suggestions for improvement

Start each section with its number and title."""

    full_response = get_completion(few_shot_prompt, model, tokenizer)
    
    # Find the start of the AI's response (after the input code)
    response_start = full_response.find(code_to_analyze)
    if response_start != -1:
        response_start += len(code_to_analyze)
        ai_response = full_response[response_start:].strip()
        
        # Find the second occurrence of "1. Understanding of the code"
        first_occurrence = ai_response.find("1. Understanding of the code")
        if first_occurrence != -1:
            second_occurrence = ai_response.find("1. Understanding of the code", first_occurrence + 1)
            if second_occurrence != -1:
                ai_response = ai_response[second_occurrence:]
            else:
                # If we can't find a second occurrence, start from the first one
                ai_response = ai_response[first_occurrence:]
        
        return ai_response
    else:
        return "Error: Unable to extract the AI's response. Here's the full output:\n\n" + full_response


# Create Gradio interface
iface = gr.Interface(
    fn=code_review,
    inputs=gr.Textbox(lines=10, label="Enter code to analyze"),
    outputs=gr.Textbox(label="Code Review Result"),
    title="Code Review Expert",
    description="This tool analyzes code for potential security flaws, logic vulnerabilities, and provides guidance on secure coding practices."
)

# Launch the Gradio app
iface.launch()