Spaces:

xiaohang07
/

MatterGPT_CPU

Running

App Files Files Community

xiaohang07 commited on Aug 14, 2024

Commit

d94c1ca

verified ·

1 Parent(s): fe7e2d0

Upload 9 files

Browse files

Files changed (9) hide show

Figure1.png +0 -0
README.md +3 -3
Voc_prior +130 -0
app.py +144 -0
config.json +29 -0
mattergpt_wrapper.py +70 -0
model.py +312 -0
pytorch_model.pt +3 -0
requirements.txt +4 -0

Figure1.png ADDED Viewed

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: MatterGPT CPU
-emoji: 🏃
 colorFrom: purple
-colorTo: yellow
 sdk: gradio
 sdk_version: 4.41.0
 app_file: app.py
@@ -10,4 +10,4 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: MatterGPT CPU
+emoji: 🖼
 colorFrom: purple
+colorTo: red
 sdk: gradio
 sdk_version: 4.41.0
 app_file: app.py
 license: mit
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Voc_prior ADDED Viewed

	@@ -0,0 +1,130 @@

+S
+o-o
+He
+Dy
+-o-
+Ne
++-o
+Re
+Bi
+Cu
+oo+
+16
+Sc
+--o
+Nd
+Lu
+-+o
+Te
+Si
+o+o
+Er
+1
+Sr
+Hg
+3
+oo-
+8
+Ru
+H
+Mo
+Tc
+12
+11
++oo
+Pb
+6
+In
+La
+--+
+C
+Sn
+Se
+B
+Ar
+o--
+-o+
+Ga
+++o
+Rh
+Sm
+Ir
+Li
+Tl
+18
+I
+Cl
+Ag
+Ba
+Ta
+Ho
+Tb
+As
+-+-
+Gd
+Os
+O
+15
+---
+W
+F
+13
+Pm
+K
+Na
+9
+Eu
+Ce
+14
+-++
+5
+Ge
+Yb
+Al
+Rb
+Pd
+Ni
+Cd
+Hf
+P
+Zn
+Ti
+Nb
+0
+Pr
+7
+Mg
+Y
++-+
+ooo
+Pt
++--
+19
+Cs
+N
+-oo
++o-
+o-+
+Xe
+4
+o+-
+Tm
+2
+Cr
+Fe
++o+
+Zr
+++-
+Kr
+10
++++
+Co
+o++
+Be
+Br
+Mn
+Ca
+Au
+V
+Sb
+17

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import gradio as gr
+import torch
+from mattergpt_wrapper import MatterGPTWrapper, SimpleTokenizer
+import os
+from slices.core import SLICES
+from pymatgen.core.structure import Structure
+from pymatgen.io.cif import CifWriter
+from pymatgen.io.ase import AseAtomsAdaptor
+from ase.io import write as ase_write
+import tempfile
+import time
+# 设置PyTorch使用的线程数
+torch.set_num_threads(2)
+def load_quantized_model(model_path):
+    model = MatterGPTWrapper.from_pretrained(model_path)
+    model.to('cpu')
+    model.eval()
+    quantized_model = torch.quantization.quantize_dynamic(
+        model, {torch.nn.Linear}, dtype=torch.qint8
+    )
+    return quantized_model
+# Load and quantize the model
+model_path = "./"
+quantized_model = load_quantized_model(model_path)
+quantized_model.to("cpu")
+quantized_model.eval()
+# Load the tokenizer
+tokenizer_path = "Voc_prior"
+tokenizer = SimpleTokenizer(tokenizer_path)
+# Initialize SLICES backend
+try:
+    backend = SLICES(relax_model="chgnet",fmax=0.4,steps=25)
+except Exception as e:
+    backend = SLICES(relax_model=None)
+def generate_slices_quantized(quantized_model, tokenizer, formation_energy, band_gap, max_length, temperature, do_sample, top_k, top_p):
+    condition = torch.tensor([[float(formation_energy), float(band_gap)]], dtype=torch.float32)
+    context = '>'
+    x = torch.tensor([[tokenizer.stoi[context]]], dtype=torch.long)
+    with torch.no_grad():
+        generated = quantized_model.generate(x, prop=condition, max_length=max_length,
+                                             temperature=temperature, do_sample=do_sample,
+                                             top_k=top_k, top_p=top_p)
+    return tokenizer.decode(generated[0].tolist())
+def generate_slices(formation_energy, band_gap):
+    return generate_slices_quantized(quantized_model, tokenizer, formation_energy, band_gap,
+                                     quantized_model.config.block_size, 1.2, True, 0, 0.9)
+def wrap_structure(structure):
+    """Wrap all atoms back into the unit cell."""
+    for i, site in enumerate(structure):
+        frac_coords = site.frac_coords % 1.0
+        structure.replace(i, species=site.species, coords=frac_coords, coords_are_cartesian=False)
+    return structure
+def convert_and_visualize(slices_string):
+    try:
+        structure, energy = backend.SLICES2structure(slices_string)
+        # Wrap atoms back into the unit cell
+        structure = wrap_structure(structure)
+        # Generate CIF and save to temporary file
+        cif_file = tempfile.NamedTemporaryFile(mode='w', suffix='.cif', delete=False)
+        cif_writer = CifWriter(structure)
+        cif_writer.write_file(cif_file.name)
+        # Generate structure summary
+        summary = f"Formula: {structure.composition.reduced_formula}\n"
+        summary += f"Number of sites: {len(structure)}\n"
+        summary += f"Lattice parameters: a={structure.lattice.a:.3f}, b={structure.lattice.b:.3f}, c={structure.lattice.c:.3f}\n"
+        summary += f"Angles: alpha={structure.lattice.alpha:.2f}, beta={structure.lattice.beta:.2f}, gamma={structure.lattice.gamma:.2f}\n"
+        summary += f"Volume: {structure.volume:.3f} Å³\n"
+        summary += f"Density: {structure.density:.3f} g/cm³"
+        # Generate structure image using ASE and save to temporary file
+        atoms = AseAtomsAdaptor.get_atoms(structure)
+        image_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
+        ase_write(image_file.name, atoms, format='png', rotation='10x,10y,10z')
+        return cif_file.name, image_file.name, summary, f"Conversion successful. Energy: {energy:.4f} eV/atom", True
+    except Exception as e:
+        return "", "", "", f"Conversion failed. Error: {str(e)}", False
+def generate_and_convert(formation_energy, band_gap):
+    max_attempts = 5
+    start_time = time.time()
+    max_time = 300  # 5 minutes maximum execution time
+    for attempt in range(max_attempts):
+        if time.time() - start_time > max_time:
+            return "Exceeded maximum execution time", "", "", "", "Generation and conversion failed due to timeout"
+        slices_string = generate_slices(formation_energy, band_gap)
+        cif_file, image_file, structure_summary, status, success = convert_and_visualize(slices_string)
+        if success:
+            return slices_string, cif_file, image_file, structure_summary, f"Successful on attempt {attempt + 1}: {status}"
+        if attempt == max_attempts - 1:
+            return slices_string, "", "", "", f"Failed after {max_attempts} attempts: {status}"
+    return "Failed to generate valid SLICES string", "", "", "", "Generation failed"
+# Create the Gradio interface
+with gr.Blocks() as iface:
+    gr.Markdown("# Crystal Inverse Designer: From Properties to Structures")
+    with gr.Row():
+        with gr.Column():
+            gr.Image("Figure1.png", label="De novo crystal generation by MatterGPT targeting desired Eg, Ef", width=1000, height=300)
+            gr.Markdown("**Enter desired properties to inversely design materials (encoded in SLICES), then decode it into crystal structure.**")
+    with gr.Row():
+        with gr.Column(scale=2):
+            band_gap = gr.Number(label="Band Gap (eV)", value=2.0)
+            formation_energy = gr.Number(label="Formation Energy (eV/atom)", value=-1.0)
+            generate_button = gr.Button("Generate")
+        with gr.Column(scale=3):
+            slices_output = gr.Textbox(label="Generated SLICES String")
+            cif_output = gr.File(label="Download CIF", file_types=[".cif"])
+            structure_image = gr.Image(label="Structure Visualization")
+            structure_summary = gr.Textbox(label="Structure Summary", lines=6)
+            conversion_status = gr.Textbox(label="Conversion Status")
+    generate_button.click(
+        generate_and_convert,
+        inputs=[formation_energy, band_gap],
+        outputs=[slices_output, cif_output, structure_image, structure_summary, conversion_status]
+    )
+iface.launch(share=True)

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "model_type": "gpt",
+    "architectures": [
+      "GPT"
+    ],
+    "vocab_size": 132,
+    "block_size": 397,
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "num_props": 2,
+    "activation_function": "gelu_new",
+    "resid_pdrop": 0.1,
+    "embd_pdrop": 0.1,
+    "attn_pdrop": 0.1,
+    "layer_norm_epsilon": 1e-5,
+    "initializer_range": 0.02,
+    "summary_type": "cls_index",
+    "summary_use_proj": true,
+    "summary_activation": null,
+    "summary_proj_to_labels": true,
+    "summary_first_dropout": 0.1,
+    "scale_attn_weights": true,
+    "use_cache": true,
+    "bos_token_id": 130,
+    "eos_token_id": 131,
+    "lstm": false,
+    "lstm_layers": 0
+  }

mattergpt_wrapper.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import torch
+from torch import nn
+from transformers import PreTrainedModel, PretrainedConfig
+from model import GPT, GPTConfig  # Import your original model and config classes
+import json
+class CustomGPTConfig(PretrainedConfig):
+    model_type = "gpt"
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+class MatterGPTWrapper(PreTrainedModel):
+    config_class = CustomGPTConfig
+    base_model_prefix = "gpt"
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = GPT(GPTConfig(**config.__dict__))
+    def forward(self, input_ids, attention_mask=None, labels=None, prop=None):
+        return self.model(input_ids, targets=labels, prop=prop)
+    def generate(self, input_ids, prop, max_length, num_return_sequences=1, **kwargs):
+        steps = max_length - input_ids.shape[1]
+        return self.model.sample(input_ids, steps, prop=prop, **kwargs)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_path, *model_args, **kwargs):
+        config_file = f"{pretrained_model_path}/config.json"
+        with open(config_file, 'r') as f:
+            config_dict = json.load(f)
+        config = CustomGPTConfig(**config_dict)
+        model = cls(config)
+        # 加载模型权重
+        state_dict = torch.load(f"{pretrained_model_path}/pytorch_model.pt", map_location="cpu")
+        model.model.load_state_dict(state_dict)
+        return model
+    def save_pretrained(self, save_directory):
+        self.config.save_pretrained(save_directory)
+        torch.save(self.model.state_dict(), f"{save_directory}/pytorch_model.pt")
+class SimpleTokenizer:
+    def __init__(self, vocab_file):
+        with open(vocab_file, 'r') as f:
+            self.vocab = f.read().splitlines()
+        self.vocab = sorted(set(self.vocab + ['<', '>']))
+        self.stoi = {ch: i for i, ch in enumerate(self.vocab)}
+        self.itos = {i: ch for i, ch in enumerate(self.vocab)}
+    def encode(self, text):
+        return [self.stoi[token] for token in text.split()]
+    def decode(self, ids):
+        return " ".join([self.itos[int(i)] for i in ids if i in self.itos]).replace("<", "").strip()
+    def __call__(self, text, return_tensors=None):
+        encoded = self.encode(text)
+        if return_tensors == 'pt':
+            import torch
+            return {'input_ids': torch.tensor([encoded])}
+        return {'input_ids': [encoded]}

model.py ADDED Viewed

	@@ -0,0 +1,312 @@

+# -*- coding: utf-8 -*-
+# Yan Chen 2023.10
+# [email protected]
+"""
+GPT model:
+- the initial stem consists of a combination of token encoding and a positional encoding
+- the meat of it is a uniform sequence of Transformer blocks
+    - each Transformer is a sequential combination of a 1-hidden-layer MLP block and a self-attention block
+    - all blocks feed into a central residual pathway similar to resnets
+- the final decoder is a linear projection into a vanilla Softmax classifier
+"""
+import math,json
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+class GPTConfig:
+    """ base GPT config, params common to all GPT versions """
+    embd_pdrop = 0.1
+    resid_pdrop = 0.1
+    attn_pdrop = 0.1
+    def __init__(self, vocab_size, block_size, **kwargs):
+        self.vocab_size = vocab_size
+        self.block_size = block_size
+        for k,v in kwargs.items():
+            setattr(self, k, v)
+class GPT1Config(GPTConfig):
+    """ GPT-1 like network roughly 125M params """
+    n_layer = 12
+    n_head = 12
+    n_embd = 768
+class CausalSelfAttention(nn.Module):
+    """
+    A vanilla multi-head masked self-attention layer with a projection at the end.
+    It is possible to use torch.nn.MultiheadAttention here but I am including an
+    explicit implementation here to show that there is nothing too scary here.
+    """
+    def __init__(self, config):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+        # key, query, value projections for all heads
+        self.key = nn.Linear(config.n_embd, config.n_embd)
+        self.query = nn.Linear(config.n_embd, config.n_embd)
+        self.value = nn.Linear(config.n_embd, config.n_embd)
+        # regularization
+        self.attn_drop = nn.Dropout(config.attn_pdrop)
+        self.resid_drop = nn.Dropout(config.resid_pdrop)
+        # output projection
+        self.proj = nn.Linear(config.n_embd, config.n_embd)
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        num = int(bool(config.num_props))
+        # num = 1
+        self.register_buffer("mask", torch.tril(torch.ones(config.block_size + num, config.block_size + num))
+                                     .view(1, 1, config.block_size + num, config.block_size + num))
+        self.n_head = config.n_head
+    def forward(self, x, layer_past=None):
+        B, T, C = x.size()
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        q = self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        v = self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+        attn_save = att
+        att = self.attn_drop(att)
+        y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
+        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side
+        # output projection
+        y = self.resid_drop(self.proj(y))
+        return y, attn_save
+class Block(nn.Module):
+    """ an unassuming Transformer block """
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config.n_embd)
+        self.ln2 = nn.LayerNorm(config.n_embd)
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config.n_embd, 4 * config.n_embd),
+            nn.GELU(),
+            nn.Linear(4 * config.n_embd, config.n_embd),
+            nn.Dropout(config.resid_pdrop),
+        )
+    def forward(self, x):
+        y, attn = self.attn(self.ln1(x))
+        x = x + y
+        x = x + self.mlp(self.ln2(x))
+        return x, attn
+class GPT(nn.Module):
+    """  the full GPT language model, with a context size of block_size """
+    def __init__(self, config):
+        super().__init__()
+        #print(json.dumps(config.__dict__, indent=2))
+        # input embedding stem
+        self.config = config
+        self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)
+        self.type_emb = nn.Embedding(2, config.n_embd)
+        if config.num_props:
+            self.prop_nn = nn.Linear(config.num_props, config.n_embd)
+        self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
+        self.drop = nn.Dropout(config.embd_pdrop)
+        # transformer
+        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
+        # decoder head
+        self.ln_f = nn.LayerNorm(config.n_embd)
+        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+        self.block_size = config.block_size
+        if config.lstm:
+            self.lstm = nn.LSTM(input_size = config.n_embd, hidden_size = config.n_embd, num_layers = config.lstm_layers, dropout = 0.3, bidirectional = False)
+        self.apply(self._init_weights)
+        #logger.info("number of parameters: %e", sum(p.numel() for p in self.parameters()))
+    def get_block_size(self):
+        return self.block_size
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def configure_optimizers(self, train_config):
+        """
+        This long function is unfortunately doing something very simple and is being very defensive:
+        We are separating out all parameters of the model into two buckets: those that will experience
+        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
+        We are then returning the PyTorch optimizer object.
+        """
+        # separate out all parameters to those that will and won't experience regularizing weight decay
+        decay = set()
+        no_decay = set()
+        whitelist_weight_modules = (torch.nn.Linear, torch.nn.LSTM)
+        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
+        for mn, m in self.named_modules():
+            for pn, p in m.named_parameters():
+                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
+                if pn.endswith('bias') or ('bias' in pn):
+                    # all biases will not be decayed
+                    no_decay.add(fpn)
+                elif (pn.endswith('weight') or ('weight' in pn)) and isinstance(m, whitelist_weight_modules):
+                    # weights of whitelist modules will be weight decayed
+                    decay.add(fpn)
+                elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
+                    # weights of blacklist modules will NOT be weight decayed
+                    no_decay.add(fpn)
+        # special case the position embedding parameter in the root GPT module as not decayed
+        no_decay.add('pos_emb')
+        # validate that we considered every parameter
+        param_dict = {pn: p for pn, p in self.named_parameters()}
+        inter_params = decay & no_decay
+        union_params = decay | no_decay
+        assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
+        assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
+                                                    % (str(param_dict.keys() - union_params), )
+        # create the pytorch optimizer object
+        optim_groups = [
+            {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": train_config.weight_decay},
+            {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
+        ]
+        optimizer = torch.optim.AdamW(optim_groups, lr=train_config.learning_rate, betas=train_config.betas)
+        return optimizer
+    def forward(self, idx, targets=None, prop = None):
+        b, t = idx.size()
+        assert t <= self.block_size, "Cannot forward, model block size is exhausted."
+        if self.config.num_props:
+            assert prop.size(-1) == self.config.num_props, "Num_props should be equal to last dim of property vector"
+        # forward the GPT model
+        token_embeddings = self.tok_emb(idx) # each index maps to a (learnable) vector
+        position_embeddings = self.pos_emb[:, :t, :] # each position maps to a (learnable) vector
+        type_embeddings = self.type_emb(torch.ones((b,t), dtype = torch.long, device = idx.device))
+        x = self.drop(token_embeddings + position_embeddings + type_embeddings)
+        embed = x
+        if self.config.num_props:
+            type_embd = self.type_emb(torch.zeros((b, 1), dtype = torch.long, device = idx.device))
+            if prop.ndim == 2:
+                p = self.prop_nn(prop.unsqueeze(1))    # for single property
+            else:
+                p = self.prop_nn(prop)    # for multiproperty
+            p += type_embd
+            x = torch.cat([p, x], 1)
+        # x = self.blocks(x)
+        attn_maps = []
+        for layer in self.blocks:
+            x, attn = layer(x)
+            attn_maps.append(attn)
+        x = self.ln_f(x)
+        logits = self.head(x)
+        if self.config.num_props:
+            num = int(bool(self.config.num_props))
+        else:
+            num = 0
+        logits = logits[:, num:, :]
+        # if we are given some desired targets also calculate the loss
+        loss = None
+        if targets is not None:
+            loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.view(-1))
+        return logits, loss, attn_maps, embed # (num_layers, batch_size, num_heads, max_seq_len, max_seq_len)
+    @torch.no_grad()
+    def sample(self, x, steps, temperature=1.0, do_sample=False, top_k=None, top_p=None, prop=None):
+        """
+        Take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
+        the sequence, feeding the predictions back into the model each time. Clearly the sampling
+        has quadratic complexity unlike an RNN that is only linear, and has a finite context window
+        of block_size, unlike an RNN that has an infinite context window.
+        Most likely you'll want to make sure to be in model.eval() mode of operation for this.
+        """
+        #model.eval()
+        def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
+            """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
+                Args:
+                    logits: logits distribution shape (batch size x vocabulary size)
+                    top_k > 0: keep only top k tokens with highest probability (top-k filtering).
+                    top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
+                        Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
+                From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
+            """
+            top_k = min(top_k, logits.size(-1))  # Safety check
+            if top_k > 0:
+                # Remove all tokens with a probability less than the last token of the top-k
+                indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+                logits[indices_to_remove] = filter_value
+            if top_p > 0.0:
+                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+                # Remove tokens with cumulative probability above the threshold
+                sorted_indices_to_remove = cumulative_probs > top_p
+                # Shift the indices to the right to keep also the first token above the threshold
+                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                sorted_indices_to_remove[..., 0] = 0
+                # scatter sorted tensors to original indexing
+                indices_to_remove = sorted_indices_to_remove.scatter(dim=1, index=sorted_indices, src=sorted_indices_to_remove)
+                logits[indices_to_remove] = filter_value
+            return logits
+        for k in range(steps):
+            x_cond = x if x.size(1) <= self.block_size else x[:, -self.block_size:] # crop context if needed
+            # forward the model to get the logits for the index in the sequence
+            logits, _, _, _ = self(x_cond, prop = prop) # for sampling, no target
+            # pluck the logits at the final step and scale by desired temperature
+            logits = logits[:, -1, :] / temperature
+            # optionally crop the logits to only the top k options OR using nucleus (top-p) filtering
+            #if top_k is not None:
+            #    v, _ = torch.topk(logits, top_k)
+            #    logits[logits < v[:, [-1]]] = -float('Inf')
+            logits = top_k_top_p_filtering(logits, top_p=top_p, top_k=top_k)
+            # apply softmax to convert logits to (normalized) probabilities
+            probs = F.softmax(logits, dim=-1)
+            # sample from the distribution or take the most likely
+            if do_sample:
+                x_next = torch.multinomial(probs, num_samples=1)
+            else:
+                _, x_next = torch.topk(probs, k=1, dim=-1)
+            # append sampled index to the running sequence and continue
+            x = torch.cat((x, x_next), dim=1)
+        return x[:, 1:]

pytorch_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d0071fa7c05449273bfaf60357e2c4f9525c8b8f47e6d313856160312a72b21
+size 349946009

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+spaces
+slices==2.0.4