hululuzhu commited on
Commit
d56ff99
·
1 Parent(s): 73ef5f5

Add headers and todo

Browse files
Files changed (1) hide show
  1. README.md +15 -25
README.md CHANGED
@@ -11,11 +11,15 @@ widget:
11
  - text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
12
  ---
13
 
14
- # A code autocomplete T5 model for solidity
15
- - Hello world example to use this model, notice the input `text` includes
 
 
16
  - Header solidity version like `pragma solidity ^0.5.7`
17
  - Ancestor class/library info, e.g. public functions and constants from `ParentA`
18
  - Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
 
 
19
 
20
  ```python
21
  # !pip install transformers -q
@@ -38,35 +42,16 @@ print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
38
  # Expect outcome
39
  """
40
  string public constant name = "Hello World";
41
- string public constant symbol = "HELPER";
42
- uint8 public constant decimals = 18;
43
- uint256 public constant initialSupply = 0;
44
  uint256 public constant override returns (uint256) {
45
  return initialSupply;
46
  }
47
  function initialSupply() public view returns (uint256) {
48
- return initialSupply;
49
- }
50
- function balanceOf(address _owner) public view returns (uint256) {
51
- return balanceOf(_owner);
52
- }
53
- function transfer(address _to, uint256 _value) public returns (bool) {
54
- balanceOf[msg.sender] -= _value;
55
- balanceOf[_to] += _value;
56
- emit Transfer(msg.sender, _to, _value);
57
- return true;
58
- }
59
- function transferFrom(address _from, address _to, uint256 _value) public returns (bool) {
60
- balanceOf[_from] -= _value;
61
- balanceOf[_to] += _value;
62
- emit Transfer(_from, _to, _value);
63
- return true;
64
- }
65
- function approve(address _spender, uint256 _value) public returns (bool)
66
  """
67
  ```
68
 
69
-
70
  - Base T5 code model: https://huggingface.co/Salesforce/codet5-large
71
  - Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
72
  - Processing steps: Clean, contract-level segmentation sepration, split in and out
@@ -116,4 +101,9 @@ function approve(address _spender, uint256 _value) public returns (bool)
116
  }
117
  }
118
  ```
119
- - Source training code: To be added
 
 
 
 
 
 
11
  - text: "pragma solidity ^0.5.7;\n// Context: ParentA | Functions: helloA helloB | Constants: constantA \ncontract HelloWorld is ParentA {"
12
  ---
13
 
14
+ # A code generation T5 model for solidity (web3 smart contract)
15
+
16
+ ## Hello World example
17
+ - A hello world example to use this model, notice the input `text` includes
18
  - Header solidity version like `pragma solidity ^0.5.7`
19
  - Ancestor class/library info, e.g. public functions and constants from `ParentA`
20
  - Contract/Library/Interface declaration header, e.g. `HelloWorld` ended with `{`
21
+ - Or simply use the test widget on the right side of the window and test, however
22
+ the quality is known to be worse without decoding params
23
 
24
  ```python
25
  # !pip install transformers -q
 
42
  # Expect outcome
43
  """
44
  string public constant name = "Hello World";
45
+ ...
 
 
46
  uint256 public constant override returns (uint256) {
47
  return initialSupply;
48
  }
49
  function initialSupply() public view returns (uint256) {
50
+ ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  """
52
  ```
53
 
54
+ ## Background
55
  - Base T5 code model: https://huggingface.co/Salesforce/codet5-large
56
  - Source data: https://huggingface.co/datasets/mwritescode/slither-audited-smart-contracts
57
  - Processing steps: Clean, contract-level segmentation sepration, split in and out
 
101
  }
102
  }
103
  ```
104
+ - Source training code: To be added
105
+
106
+ ## Future TODO
107
+ - The model is significantly under-trained because of lack of GPU budget, need 10x colab resources (~$100 for full train)
108
+ - This is quite limited on how the model is used, potentially we could switch to GPT2 decoder-only to compare, but CodeT5 has its strong code optimization
109
+ - Need more classifiers (T5 or BERT alike) to detect potential defects.