Upload tokenizer
Browse files- tokenizer.json +22 -7
- vocab.txt +15 -0
tokenizer.json
CHANGED
|
@@ -150,13 +150,28 @@
|
|
| 150 |
"[CLS]": 2,
|
| 151 |
"[SEP]": 3,
|
| 152 |
"[MASK]": 4,
|
| 153 |
-
"
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
-
"
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
}
|
| 161 |
}
|
| 162 |
}
|
|
|
|
| 150 |
"[CLS]": 2,
|
| 151 |
"[SEP]": 3,
|
| 152 |
"[MASK]": 4,
|
| 153 |
+
"0": 5,
|
| 154 |
+
"1": 6,
|
| 155 |
+
"2": 7,
|
| 156 |
+
"3": 8,
|
| 157 |
+
"4": 9,
|
| 158 |
+
"5": 10,
|
| 159 |
+
"6": 11,
|
| 160 |
+
"7": 12,
|
| 161 |
+
"8": 13,
|
| 162 |
+
"9": 14,
|
| 163 |
+
"##5": 15,
|
| 164 |
+
"##0": 16,
|
| 165 |
+
"##4": 17,
|
| 166 |
+
"##3": 18,
|
| 167 |
+
"##1": 19,
|
| 168 |
+
"##2": 20,
|
| 169 |
+
"10": 21,
|
| 170 |
+
"14": 22,
|
| 171 |
+
"13": 23,
|
| 172 |
+
"11": 24,
|
| 173 |
+
"12": 25,
|
| 174 |
+
"15": 26
|
| 175 |
}
|
| 176 |
}
|
| 177 |
}
|
vocab.txt
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
[CLS]
|
| 4 |
[SEP]
|
| 5 |
[MASK]
|
|
|
|
| 6 |
1
|
| 7 |
2
|
| 8 |
3
|
|
@@ -10,3 +11,17 @@
|
|
| 10 |
5
|
| 11 |
6
|
| 12 |
7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
[CLS]
|
| 4 |
[SEP]
|
| 5 |
[MASK]
|
| 6 |
+
0
|
| 7 |
1
|
| 8 |
2
|
| 9 |
3
|
|
|
|
| 11 |
5
|
| 12 |
6
|
| 13 |
7
|
| 14 |
+
8
|
| 15 |
+
9
|
| 16 |
+
##5
|
| 17 |
+
##0
|
| 18 |
+
##4
|
| 19 |
+
##3
|
| 20 |
+
##1
|
| 21 |
+
##2
|
| 22 |
+
10
|
| 23 |
+
14
|
| 24 |
+
13
|
| 25 |
+
11
|
| 26 |
+
12
|
| 27 |
+
15
|