beomi commited on
Commit
d484d83
·
1 Parent(s): b9b4a98

Newly trained kcgpt2

Browse files
added_tokens.json DELETED
@@ -1 +0,0 @@
1
- {"<|endoftext|>": 51200}
 
 
all_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 7.106,
5
- "train_samples": 351024,
6
- "train_samples_per_second": 98796.531,
7
- "train_steps_per_second": 6174.783
8
- }
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,26 +1,14 @@
1
  {
2
- "_name_or_path": "skt/kogpt2-base-v2",
3
- "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2LMHeadModel"
7
  ],
8
  "attn_pdrop": 0.1,
9
- "author": "Heewon Jeon(madjakarta@gmail.com)",
10
  "bos_token_id": 0,
11
- "created_date": "2021-04-28",
12
  "embd_pdrop": 0.1,
13
- "eos_token_id": 1,
14
- "gradient_checkpointing": false,
15
- "id2label": {
16
- "0": "LABEL_0"
17
- },
18
  "initializer_range": 0.02,
19
- "label2id": {
20
- "LABEL_0": 0
21
- },
22
  "layer_norm_epsilon": 1e-05,
23
- "license": "CC-BY-NC-SA 4.0",
24
  "model_type": "gpt2",
25
  "n_ctx": 1024,
26
  "n_embd": 768,
@@ -28,8 +16,9 @@
28
  "n_inner": null,
29
  "n_layer": 12,
30
  "n_positions": 1024,
31
- "pad_token_id": 3,
32
  "resid_pdrop": 0.1,
 
33
  "scale_attn_weights": true,
34
  "summary_activation": null,
35
  "summary_first_dropout": 0.1,
@@ -42,7 +31,8 @@
42
  "max_length": 50
43
  }
44
  },
45
- "transformers_version": "4.7.0.dev0",
 
46
  "use_cache": true,
47
- "vocab_size": 51201
48
  }
 
1
  {
 
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPT2LMHeadModel"
5
  ],
6
  "attn_pdrop": 0.1,
 
7
  "bos_token_id": 0,
 
8
  "embd_pdrop": 0.1,
9
+ "eos_token_id": 0,
 
 
 
 
10
  "initializer_range": 0.02,
 
 
 
11
  "layer_norm_epsilon": 1e-05,
 
12
  "model_type": "gpt2",
13
  "n_ctx": 1024,
14
  "n_embd": 768,
 
16
  "n_inner": null,
17
  "n_layer": 12,
18
  "n_positions": 1024,
19
+ "reorder_and_upcast_attn": false,
20
  "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
  "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
 
31
  "max_length": 50
32
  }
33
  },
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.12.3",
36
  "use_cache": true,
37
+ "vocab_size": 55000
38
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ecde191eb99ae4587f48aece7654374be4123ac8cd9b4c1ad53383b4dc10b7
3
+ size 1024745265
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd9c0df26745d39c0fe5ddcab07ee36eeb6af845e819706e93454db5dd028201
3
- size 513308283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc901c35c76f1939ee8ddb38077f5a769fa86ea77db759f0771f02329da75427
3
+ size 524974313
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f57cf784eb422e17e9e8c7b08d5f4ce2b53d969cb8932de2f797667d0ec369
3
+ size 14503
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b70a5bfca8b17ddde677e7a3b2022a8e1351340a715166b16130293e2978126
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:516e139d1b9de943ba4efb04d288122f8f75b95f8cb90cafa1c744af780cd436
3
+ size 623
special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "unk_token": "<|endoftext|>",
3
- "bos_token": "<|endoftext|>",
4
- "eos_token": "<|endoftext|>",
5
- "pad_token": "<|endoftext|>",
6
- "add_prefix_space": false,
7
- "special_tokens_map_file": null,
8
- "name_or_path": "beomi/kcgpt2"
9
- }
 
 
 
 
 
 
 
 
 
 
train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 7.106,
5
- "train_samples": 351024,
6
- "train_samples_per_second": 98796.531,
7
- "train_steps_per_second": 6174.783
8
- }
 
 
 
 
 
 
 
 
 
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f627d5bb4e00e77d7e22a08c699a62fefdd0f8cff9cdf841ed6a3888aa204268
3
- size 2479
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad6335f985d22bc10d13eb2b3fcfffe7d76290fdc64d273339a2ae160c23870
3
+ size 2799
vocab.json CHANGED
The diff for this file is too large to render. See raw diff