File size: 1,715 Bytes
62ad7b9 3523256 8830a85 3523256 8830a85 3523256 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
{
"model_type": "encoder_decoder",
"encoder_type": "csumlm_encoder",
"decoder_type": "csumlm_decoder",
"model_name": "CognoSphere/CSUMLM",
"model_description": "CognoSphere Unified Multimodal Language Model (CSUMLM) is an advanced AI model capable of processing and generating text, images, and audio data. It combines transfer learning, deep learning, self-supervised learning, meta-learning, deep meta-learning, reinforcement learning, and cross-domain analogy extraction to achieve state-of-the-art performance in multimodal tasks.",
"encoder": {
"type": "transformer",
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"intermediate_size": 3072
},
"decoder": {
"type": "transformer",
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"intermediate_size": 3072
},
"multimodal_fusion": {
"type": "transformer",
"num_layers": 6,
"hidden_size": 1024,
"num_attention_heads": 16,
"intermediate_size": 4096
},
"training_data": {
"text": [
"path/to/text/data/file1.txt",
"path/to/text/data/file2.txt",
"..."
],
"images": [
"path/to/image/data/image1.jpg",
"path/to/image/data/image2.png",
"..."
],
"audio": [
"path/to/audio/data/audio1.wav",
"path/to/audio/data/audio2.mp3",
"..."
]
},
"tokenizer": {
"type": "byte-level-bpe",
"vocab_size": 50000,
"merge_file": "path/to/bpe/merge_file.txt"
},
"optimizer": {
"type": "adamw",
"learning_rate": 5e-5,
"weight_decay": 0.01
},
"loss_function": "cross_entropy",
"evaluation_metrics": [
"bleu",
"meteor",
"rouge",
"cider"
]
} |