jingyang Ou commited on
Commit
f0c5b40
·
1 Parent(s): acf4386

update model

Browse files
Files changed (3) hide show
  1. README.md +5 -0
  2. config.json +60 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Reparameterized Absorbing Discrete Diffusion (RADD) small model with lambda-dce loss trained for 400k iterations.
2
+
3
+ Code: https://github.com/ML-GSAI/RADD.
4
+
5
+ Paper: https://arxiv.org/abs/2406.03736.
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ngpus": 32,
3
+ "tokens": 50257,
4
+ "gpt_dir": "assets/gpt2-large",
5
+ "outdir": "../output",
6
+ "training": {
7
+ "batch_size": 512,
8
+ "accum": 1,
9
+ "n_iters": 1000001,
10
+ "snapshot_freq": 50000,
11
+ "log_freq": 50,
12
+ "eval_freq": 100,
13
+ "snapshot_freq_for_preemption": 10000,
14
+ "weight": "standard",
15
+ "snapshot_sampling": false,
16
+ "ema": 0.9999,
17
+ "loss_type": "t_DCE"
18
+ },
19
+ "data": {
20
+ "train": "openwebtext",
21
+ "valid": "wikitext103",
22
+ "cache_dir": "data"
23
+ },
24
+ "noise": {
25
+ "type": "loglinear",
26
+ "sigma_min": 0.0001,
27
+ "sigma_max": 20
28
+ },
29
+ "sampling": {
30
+ "predictor": "euler",
31
+ "steps": 1024
32
+ },
33
+ "eval": {
34
+ "batch_size": 512,
35
+ "perplexity": true,
36
+ "perplexity_batch_size": 16
37
+ },
38
+ "optim": {
39
+ "weight_decay": 0.03,
40
+ "optimizer": "AdamW",
41
+ "lr": 0.0003,
42
+ "beta1": 0.9,
43
+ "beta2": 0.999,
44
+ "eps": 1e-08,
45
+ "warmup": 2500,
46
+ "grad_clip": 1.0
47
+ },
48
+ "model": {
49
+ "name": "small_wotsm",
50
+ "type": "ddit_wot",
51
+ "hidden_size": 768,
52
+ "cond_dim": 128,
53
+ "length": 1024,
54
+ "n_blocks": 12,
55
+ "n_heads": 12,
56
+ "dropout": 0.02,
57
+ "use_checkpoint": false,
58
+ "dtype": "float16"
59
+ }
60
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad49d643dabc3aca8877d0df7928d3f81691491705d36f29c3e510cce8f4b91
3
+ size 649074528