|
""" |
|
Module `collect` - Data Handling and RudderStack Integration |
|
|
|
This module provides functionalities to handle and send learning data to RudderStack |
|
for the purpose of analysis and to improve the gpt-engineer system. The data is sent |
|
only when the user gives consent to share. |
|
|
|
Functions: |
|
send_learning(learning): Sends learning data to RudderStack. |
|
collect_learnings(prompt, model, temperature, config, memory, review): Processes and sends learning data. |
|
collect_and_send_human_review(prompt, model, temperature, config, memory): Collects human feedback and sends it. |
|
|
|
Dependencies: |
|
hashlib: For generating SHA-256 hash. |
|
typing: For type annotations. |
|
gpt_engineer.core: Core functionalities of gpt-engineer. |
|
gpt_engineer.cli.learning: Handles the extraction of learning data. |
|
|
|
Notes: |
|
Data sent to RudderStack is not shared with third parties and is used solely to |
|
improve gpt-engineer and allow it to handle a broader range of use cases. |
|
Consent logic is in gpt_engineer/learning.py. |
|
""" |
|
|
|
from typing import Tuple |
|
|
|
from gpt_engineer.applications.cli.learning import ( |
|
Learning, |
|
Review, |
|
extract_learning, |
|
human_review_input, |
|
) |
|
from gpt_engineer.core.default.disk_memory import DiskMemory |
|
from gpt_engineer.core.prompt import Prompt |
|
|
|
|
|
def send_learning(learning: Learning): |
|
""" |
|
Send the learning data to RudderStack for analysis. |
|
|
|
Parameters |
|
---------- |
|
learning : Learning |
|
An instance of the Learning class containing the data to be sent. |
|
|
|
Notes |
|
----- |
|
This function is only called if consent is given to share data. |
|
Data is not shared to a third party. It is used with the sole purpose of |
|
improving gpt-engineer, and letting it handle more use cases. |
|
Consent logic is in gpt_engineer/learning.py. |
|
""" |
|
import rudderstack.analytics as rudder_analytics |
|
|
|
rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG" |
|
rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com" |
|
|
|
rudder_analytics.track( |
|
user_id=learning.session, |
|
event="learning", |
|
properties=learning.to_dict(), |
|
) |
|
|
|
|
|
def collect_learnings( |
|
prompt: Prompt, |
|
model: str, |
|
temperature: float, |
|
config: any, |
|
memory: DiskMemory, |
|
review: Review, |
|
): |
|
""" |
|
Collect the learning data and send it to RudderStack for analysis. |
|
|
|
Parameters |
|
---------- |
|
prompt : str |
|
The initial prompt or question that was provided to the model. |
|
model : str |
|
The name of the model used for generating the response. |
|
temperature : float |
|
The temperature setting used in the model's response generation. |
|
config : any |
|
Configuration parameters used for the learning session. |
|
memory : DiskMemory |
|
An instance of DiskMemory for storing and retrieving data. |
|
review : Review |
|
An instance of Review containing human feedback on the model's response. |
|
|
|
Notes |
|
----- |
|
This function attempts to send the learning data to RudderStack. If the data size exceeds |
|
the maximum allowed size, it trims the data and retries sending it. |
|
""" |
|
learnings = extract_learning(prompt, model, temperature, config, memory, review) |
|
try: |
|
send_learning(learnings) |
|
except RuntimeError: |
|
|
|
|
|
max_size = 32 << 10 |
|
current_size = len(learnings.to_json().encode("utf-8")) |
|
|
|
overflow = current_size - max_size |
|
|
|
|
|
remove_length = overflow + len(f"[REMOVED {overflow} CHARACTERS]") + 100 |
|
|
|
learnings.logs = ( |
|
learnings.logs[:-remove_length] |
|
+ f"\n\n[REMOVED {remove_length} CHARACTERS]" |
|
) |
|
|
|
print( |
|
"WARNING: learning too big, removing some parts. " |
|
"Please report if this results in a crash." |
|
) |
|
try: |
|
send_learning(learnings) |
|
except RuntimeError: |
|
print( |
|
"Sending learnings crashed despite truncation. Progressing without saving learnings." |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def collect_and_send_human_review( |
|
prompt: Prompt, |
|
model: str, |
|
temperature: float, |
|
config: Tuple[str, ...], |
|
memory: DiskMemory, |
|
): |
|
""" |
|
Collects human feedback on the code and sends it for analysis. |
|
|
|
Parameters |
|
---------- |
|
prompt : str |
|
The initial prompt or question that was provided to the model. |
|
model : str |
|
The name of the model used for generating the response. |
|
temperature : float |
|
The temperature setting used in the model's response generation. |
|
config : Tuple[str, ...] |
|
Configuration parameters used for the learning session. |
|
memory : DiskMemory |
|
An instance of DiskMemory for storing and retrieving data. |
|
|
|
Returns |
|
------- |
|
None |
|
|
|
Notes |
|
----- |
|
This function prompts the user for a review of the generated or improved code using the |
|
`human_review_input` function. If a valid review is provided, it's serialized to JSON format |
|
and stored within the database's memory under the "review" key. |
|
""" |
|
|
|
review = human_review_input() |
|
if review: |
|
collect_learnings(prompt, model, temperature, config, memory, review) |
|
|