File size: 5,539 Bytes
d26280a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
"""
This module provides functionalities to handle and send learning data to RudderStack
for the purpose of analysis and to improve the gpt-engineer system. The data is sent
only when the user gives consent to share.
The module provides the following main functions:
- `send_learning`: Directly send a learning data to RudderStack.
- `collect_learnings`: Extract, possibly adjust, and send the learning data based on
provided input parameters.
- `steps_file_hash`: Computes the SHA-256 hash of the steps file, which might be used
for identifying the exact version or changes in the steps.
Dependencies:
- hashlib: For generating SHA-256 hash.
- typing: For type annotations.
- gpt_engineer.core: Core functionalities of gpt-engineer.
- gpt_engineer.cli.learning: Handles the extraction of learning data.
Note:
Data sent to RudderStack is not shared with third parties and is used solely to
improve gpt-engineer and allow it to handle a broader range of use cases.
Consent logic is in gpt_engineer/learning.py.
"""
from typing import Tuple
from gpt_engineer.applications.cli.learning import (
Learning,
Review,
extract_learning,
human_review_input,
)
from gpt_engineer.core.default.disk_memory import DiskMemory
def send_learning(learning: Learning):
"""
Send the learning data to RudderStack for analysis.
Note:
This function is only called if consent is given to share data.
Data is not shared to a third party. It is used with the sole purpose of
improving gpt-engineer, and letting it handle more use cases.
Consent logic is in gpt_engineer/learning.py
Parameters
----------
learning : Learning
The learning data to send.
"""
import rudderstack.analytics as rudder_analytics
rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"
rudder_analytics.track(
user_id=learning.session,
event="learning",
properties=learning.to_dict(), # type: ignore
)
def collect_learnings(
prompt: str,
model: str,
temperature: float,
config: any,
memory: DiskMemory,
review: Review,
):
"""
Collect the learning data and send it to RudderStack for analysis.
Parameters
----------
model : str
The name of the model used.
temperature : float
The temperature used.
steps : List[Step]
The list of steps.
dbs : DBs
The database containing the workspace.
"""
learnings = extract_learning(prompt, model, temperature, config, memory, review)
try:
send_learning(learnings)
except RuntimeError:
# try to remove some parts of learning that might be too big
# rudderstack max event size is 32kb
max_size = 32 << 10 # 32KB in bytes
current_size = len(learnings.to_json().encode("utf-8")) # get size in bytes
overflow = current_size - max_size
# Add some extra characters for the "[REMOVED...]" string and for safety margin
remove_length = overflow + len(f"[REMOVED {overflow} CHARACTERS]") + 100
learnings.logs = (
learnings.logs[:-remove_length]
+ f"\n\n[REMOVED {remove_length} CHARACTERS]"
)
print(
"WARNING: learning too big, removing some parts. "
"Please report if this results in a crash."
)
try:
send_learning(learnings)
except RuntimeError:
print(
"Sending learnings crashed despite truncation. Progressing without saving learnings."
)
# def steps_file_hash():
# """
# Compute the SHA-256 hash of the steps file.
#
# Returns
# -------
# str
# The SHA-256 hash of the steps file.
# """
# with open(steps.__file__, "r") as f:
# content = f.read()
# return hashlib.sha256(content.encode("utf-8")).hexdigest()
def collect_and_send_human_review(
prompt: str,
model: str,
temperature: float,
config: Tuple[str, ...],
memory: DiskMemory,
):
"""
Collects human feedback on the code and stores it in memory.
This function prompts the user for a review of the generated or improved code using the `human_review_input`
function. If a valid review is provided, it's serialized to JSON format and stored within the database's
memory under the "review" key.
Parameters:
- ai (AI): An instance of the AI model. Although not directly used within the function, it is kept as
a parameter for consistency with other functions.
- dbs (DBs): An instance containing the database configurations, user prompts, project metadata,
and memory storage. This function specifically interacts with the memory storage to save the human review.
Returns:
- list: Returns an empty list, indicating that there's no subsequent interaction with the LLM
or no further messages to be processed.
Notes:
- It's assumed that the `human_review_input` function handles all the interactions with the user to
gather feedback and returns either the feedback or None if no feedback was provided.
- Ensure that the database's memory has enough space or is set up correctly to store the serialized review data.
"""
"""Collects and stores human review of the code"""
review = human_review_input()
if review:
collect_learnings(prompt, model, temperature, config, memory, review)
|