Shuu12121 commited on
Commit
de7c2a8
·
verified ·
1 Parent(s): 0c2e1fc

Upload ModernBERT model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - generated_from_trainer
7
+ - dataset_size:3999600
8
+ - loss:MultipleNegativesRankingLoss
9
+ base_model: Shuu12121/CodeModernBERT-Owl-v1
10
+ widget:
11
+ - source_sentence: 'The list of databases
12
+
13
+
14
+ Generated from protobuf field <code>repeated .google.cloud.alloydb.v1.Database
15
+ databases = 1;</code>
16
+
17
+ @param array<\Google\Cloud\AlloyDb\V1\Database>|\Google\Protobuf\Internal\RepeatedField
18
+ $var
19
+
20
+ @return $this'
21
+ sentences:
22
+ - "public function setDatabases($var)\n {\n $arr = GPBUtil::checkRepeatedField($var,\
23
+ \ \\Google\\Protobuf\\Internal\\GPBType::MESSAGE, \\Google\\Cloud\\AlloyDb\\V1\\\
24
+ Database::class);\n $this->databases = $arr;\n\n return $this;\n\
25
+ \ }"
26
+ - "public function toApiArray($row, $role = 'guest', $deep = true)\n {\n \
27
+ \ return $row;\n }"
28
+ - "def adapter( type = :file, *args, &block )\n adapters.add(type, *args,\
29
+ \ &block)\n end"
30
+ - source_sentence: 'Gets mention
31
+
32
+
33
+ @return \LINE\Webhook\Model\Mention|null'
34
+ sentences:
35
+ - "public final ListPhoneNumbersPagedResponse listPhoneNumbers(String parent) {\n\
36
+ \ ListPhoneNumbersRequest request =\n ListPhoneNumbersRequest.newBuilder().setParent(parent).build();\n\
37
+ \ return listPhoneNumbers(request);\n }"
38
+ - "public function getMention()\n {\n return $this->container['mention'];\n\
39
+ \ }"
40
+ - "function parse(raw, opts) {\n opts = opts || {};\n\n var preserveNumbers =\
41
+ \ opts.preserveNumbers;\n var trim = function (s) {\n return s.trim();\n \
42
+ \ };\n var obj = {};\n\n getKeyValueChunks(raw).map(trim).filter(Boolean).forEach(function\
43
+ \ (item) {\n // split with `.indexOf` rather than `.split` because the value\
44
+ \ may also contain colons.\n var pos = item.indexOf(':');\n var key = item.substr(0,\
45
+ \ pos).trim();\n var val = item.substr(pos + 1).trim();\n if (preserveNumbers\
46
+ \ && isNumeric(val)) {\n val = Number(val);\n }\n\n obj[key] = val;\n\
47
+ \ });\n\n return obj;\n}"
48
+ - source_sentence: reset clears the current state of the tree.
49
+ sentences:
50
+ - "func (a *TreeToListAdapter) reset() {\n\tcount := a.adapter.Count()\n\ta.node.descendants\
51
+ \ = count\n\ta.node.children = make([]*TreeToListNode, count)\n\tfor i := range\
52
+ \ a.node.children {\n\t\tnode := a.adapter.NodeAt(i)\n\t\titem := node.Item()\n\
53
+ \t\ta.node.children[i] = &TreeToListNode{container: node, item: item, parent:\
54
+ \ a}\n\t}\n}"
55
+ - "public void setPhases(java.util.List<Phase> phases) {\n this.phases =\
56
+ \ phases;\n }"
57
+ - "protected function getJobIdsByQueue($queue)\n {\n $failer = $this->laravel['queue.failer'];\n\
58
+ \n $ids = method_exists($failer, 'ids')\n ? $failer->ids($queue)\n\
59
+ \ : collect($failer->all())\n ->where('queue', $queue)\n\
60
+ \ ->pluck('id')\n ->toArray();\n\n if (count($ids)\
61
+ \ === 0) {\n $this->components->error(\"Unable to find failed jobs\
62
+ \ for queue [{$queue}].\");\n }\n\n return $ids;\n }"
63
+ - source_sentence: Creates a new `JsNativeError` from its `kind`, `message` and (optionally)
64
+ its `cause`.
65
+ sentences:
66
+ - "public function group(array $attributes, Closure $callback)\n {\n $previousGroupAttributes\
67
+ \ = $this->groupAttributes;\n $this->groupAttributes = array_merge_recursive($previousGroupAttributes,\
68
+ \ $attributes);\n\n \\call_user_func($callback, $this);\n\n $this->groupAttributes\
69
+ \ = $previousGroupAttributes;\n }"
70
+ - "const fn new(\n kind: JsNativeErrorKind,\n message: Cow<'static,\
71
+ \ str>,\n cause: Option<Box<JsError>>,\n ) -> Self {\n Self {\n\
72
+ \ kind,\n message,\n cause,\n realm:\
73
+ \ None,\n }\n }"
74
+ - "fn column_id_to_prune(&self, name: &str) -> Option<ColumnId> {\n let metadata\
75
+ \ = self\n .expected_metadata\n .as_ref()\n .unwrap_or_else(||\
76
+ \ self.read_format.metadata());\n metadata.column_by_name(name).map(|col|\
77
+ \ col.column_id)\n }"
78
+ - source_sentence: '#
79
+
80
+ Deletes the cluster, including the Kubernetes endpoint and all worker
81
+
82
+ nodes.
83
+
84
+
85
+ Firewalls and routes that were configured during cluster creation
86
+
87
+ are also deleted.
88
+
89
+
90
+ Other Google Compute Engine resources that might be in use by the cluster,
91
+
92
+ such as load balancer resources, are not deleted if they weren''t present
93
+
94
+ when the cluster was initially created.
95
+
96
+
97
+ @overload delete_cluster(request, options = nil)
98
+
99
+ Pass arguments to `delete_cluster` via a request object, either of type
100
+
101
+ {::Google::Cloud::Container::V1::DeleteClusterRequest} or an equivalent Hash.
102
+
103
+
104
+ @param request [::Google::Cloud::Container::V1::DeleteClusterRequest, ::Hash]
105
+
106
+ A request object representing the call parameters. Required. To specify no
107
+
108
+ parameters, or to keep all the default parameter values, pass an empty Hash.
109
+
110
+ @param options [::Gapic::CallOptions, ::Hash]
111
+
112
+ Overrides the default settings for this call, e.g, timeout, retries, etc. Optional.
113
+
114
+
115
+ @overload delete_cluster(project_id: nil, zone: nil, cluster_id: nil, name: nil)
116
+
117
+ Pass arguments to `delete_cluster` via keyword arguments. Note that at
118
+
119
+ least one keyword argument is required. To specify no parameters, or to keep all
120
+
121
+ the default parameter values, pass an empty Hash as a request object (see above).
122
+
123
+
124
+ @param project_id [::String]
125
+
126
+ Deprecated. The Google Developers Console [project ID or project
127
+
128
+ number](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
129
+
130
+ This field has been deprecated and replaced by the name field.
131
+
132
+ @param zone [::String]
133
+
134
+ Deprecated. The name of the Google Compute Engine
135
+
136
+ [zone](https://cloud.google.com/compute/docs/zones#available) in which the
137
+
138
+ cluster resides. This field has been deprecated and replaced by the name
139
+
140
+ field.
141
+
142
+ @param cluster_id [::String]
143
+
144
+ Deprecated. The name of the cluster to delete.
145
+
146
+ This field has been deprecated and replaced by the name field.
147
+
148
+ @param name [::String]
149
+
150
+ The name (project, location, cluster) of the cluster to delete.
151
+
152
+ Specified in the format `projects/*/locations/*/clusters/*`.
153
+
154
+
155
+ @yield [response, operation] Access the result along with the RPC operation
156
+
157
+ @yieldparam response [::Google::Cloud::Container::V1::Operation]
158
+
159
+ @yieldparam operation [::GRPC::ActiveCall::Operation]
160
+
161
+
162
+ @return [::Google::Cloud::Container::V1::Operation]
163
+
164
+
165
+ @raise [::Google::Cloud::Error] if the RPC is aborted.
166
+
167
+
168
+ @example Basic example
169
+
170
+ require "google/cloud/container/v1"
171
+
172
+
173
+ # Create a client object. The client can be reused for multiple calls.
174
+
175
+ client = Google::Cloud::Container::V1::ClusterManager::Client.new
176
+
177
+
178
+ # Create a request. To set request fields, pass in keyword arguments.
179
+
180
+ request = Google::Cloud::Container::V1::DeleteClusterRequest.new
181
+
182
+
183
+ # Call the delete_cluster method.
184
+
185
+ result = client.delete_cluster request
186
+
187
+
188
+ # The returned object is of type Google::Cloud::Container::V1::Operation.
189
+
190
+ p result'
191
+ sentences:
192
+ - "public function getWiFiSecurityType()\n {\n if (array_key_exists(\"\
193
+ wiFiSecurityType\", $this->_propDict)) {\n if (is_a($this->_propDict[\"\
194
+ wiFiSecurityType\"], \"\\Beta\\Microsoft\\Graph\\Model\\AospDeviceOwnerWiFiSecurityType\"\
195
+ ) || is_null($this->_propDict[\"wiFiSecurityType\"])) {\n return\
196
+ \ $this->_propDict[\"wiFiSecurityType\"];\n } else {\n \
197
+ \ $this->_propDict[\"wiFiSecurityType\"] = new AospDeviceOwnerWiFiSecurityType($this->_propDict[\"\
198
+ wiFiSecurityType\"]);\n return $this->_propDict[\"wiFiSecurityType\"\
199
+ ];\n }\n }\n return null;\n }"
200
+ - "device(deviceType, deviceId = 0) {\n\t return new DLDevice(deviceType,\
201
+ \ deviceId, this.lib);\n\t }"
202
+ - "def delete_cluster request, options = nil\n raise ::ArgumentError,\
203
+ \ \"request must be provided\" if request.nil?\n\n request = ::Gapic::Protobuf.coerce\
204
+ \ request, to: ::Google::Cloud::Container::V1::DeleteClusterRequest\n\n \
205
+ \ # Converts hash and nil to an options object\n options =\
206
+ \ ::Gapic::CallOptions.new(**options.to_h) if options.respond_to? :to_h\n\n \
207
+ \ # Customize the options with defaults\n metadata = @config.rpcs.delete_cluster.metadata.to_h\n\
208
+ \n # Set x-goog-api-client, x-goog-user-project and x-goog-api-version\
209
+ \ headers\n metadata[:\"x-goog-api-client\"] ||= ::Gapic::Headers.x_goog_api_client\
210
+ \ \\\n lib_name: @config.lib_name, lib_version: @config.lib_version,\n\
211
+ \ gapic_version: ::Google::Cloud::Container::V1::VERSION\n \
212
+ \ metadata[:\"x-goog-api-version\"] = API_VERSION unless API_VERSION.empty?\n\
213
+ \ metadata[:\"x-goog-user-project\"] = @quota_project_id if @quota_project_id\n\
214
+ \n header_params = {}\n if request.name\n \
215
+ \ header_params[\"name\"] = request.name\n end\n\n \
216
+ \ request_params_header = header_params.map { |k, v| \"#{k}=#{v}\" }.join(\"\
217
+ &\")\n metadata[:\"x-goog-request-params\"] ||= request_params_header\n\
218
+ \n options.apply_defaults timeout: @config.rpcs.delete_cluster.timeout,\n\
219
+ \ metadata: metadata,\n \
220
+ \ retry_policy: @config.rpcs.delete_cluster.retry_policy\n\
221
+ \n options.apply_defaults timeout: @config.timeout,\n \
222
+ \ metadata: @config.metadata,\n \
223
+ \ retry_policy: @config.retry_policy\n\n \
224
+ \ @cluster_manager_stub.call_rpc :delete_cluster, request, options: options do\
225
+ \ |response, operation|\n yield response, operation if block_given?\n\
226
+ \ end\n rescue ::GRPC::BadStatus => e\n raise\
227
+ \ ::Google::Cloud::Error.from_error(e)\n end"
228
+ pipeline_tag: sentence-similarity
229
+ library_name: sentence-transformers
230
+ ---
231
+
232
+ # SentenceTransformer based on Shuu12121/CodeModernBERT-Owl-v1
233
+
234
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Owl-v1](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-v1). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
235
+
236
+ ## Model Details
237
+
238
+ ### Model Description
239
+ - **Model Type:** Sentence Transformer
240
+ - **Base model:** [Shuu12121/CodeModernBERT-Owl-v1](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-v1) <!-- at revision 33220abe62ef7d02fc36c62487e77751459d8c1a -->
241
+ - **Maximum Sequence Length:** 1024 tokens
242
+ - **Output Dimensionality:** 768 dimensions
243
+ - **Similarity Function:** Cosine Similarity
244
+ <!-- - **Training Dataset:** Unknown -->
245
+ <!-- - **Language:** Unknown -->
246
+ <!-- - **License:** Unknown -->
247
+
248
+ ### Model Sources
249
+
250
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
251
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
252
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
253
+
254
+ ### Full Model Architecture
255
+
256
+ ```
257
+ SentenceTransformer(
258
+ (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel
259
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
260
+ )
261
+ ```
262
+
263
+ ## Usage
264
+
265
+ ### Direct Usage (Sentence Transformers)
266
+
267
+ First install the Sentence Transformers library:
268
+
269
+ ```bash
270
+ pip install -U sentence-transformers
271
+ ```
272
+
273
+ Then you can load this model and run inference.
274
+ ```python
275
+ from sentence_transformers import SentenceTransformer
276
+
277
+ # Download from the 🤗 Hub
278
+ model = SentenceTransformer("sentence_transformers_model_id")
279
+ # Run inference
280
+ sentences = [
281
+ '#\nDeletes the cluster, including the Kubernetes endpoint and all worker\nnodes.\n\nFirewalls and routes that were configured during cluster creation\nare also deleted.\n\nOther Google Compute Engine resources that might be in use by the cluster,\nsuch as load balancer resources, are not deleted if they weren\'t present\nwhen the cluster was initially created.\n\n@overload delete_cluster(request, options = nil)\nPass arguments to `delete_cluster` via a request object, either of type\n{::Google::Cloud::Container::V1::DeleteClusterRequest} or an equivalent Hash.\n\n@param request [::Google::Cloud::Container::V1::DeleteClusterRequest, ::Hash]\nA request object representing the call parameters. Required. To specify no\nparameters, or to keep all the default parameter values, pass an empty Hash.\n@param options [::Gapic::CallOptions, ::Hash]\nOverrides the default settings for this call, e.g, timeout, retries, etc. Optional.\n\n@overload delete_cluster(project_id: nil, zone: nil, cluster_id: nil, name: nil)\nPass arguments to `delete_cluster` via keyword arguments. Note that at\nleast one keyword argument is required. To specify no parameters, or to keep all\nthe default parameter values, pass an empty Hash as a request object (see above).\n\n@param project_id [::String]\nDeprecated. The Google Developers Console [project ID or project\nnumber](https://cloud.google.com/resource-manager/docs/creating-managing-projects).\nThis field has been deprecated and replaced by the name field.\n@param zone [::String]\nDeprecated. The name of the Google Compute Engine\n[zone](https://cloud.google.com/compute/docs/zones#available) in which the\ncluster resides. This field has been deprecated and replaced by the name\nfield.\n@param cluster_id [::String]\nDeprecated. The name of the cluster to delete.\nThis field has been deprecated and replaced by the name field.\n@param name [::String]\nThe name (project, location, cluster) of the cluster to delete.\nSpecified in the format `projects/*/locations/*/clusters/*`.\n\n@yield [response, operation] Access the result along with the RPC operation\n@yieldparam response [::Google::Cloud::Container::V1::Operation]\n@yieldparam operation [::GRPC::ActiveCall::Operation]\n\n@return [::Google::Cloud::Container::V1::Operation]\n\n@raise [::Google::Cloud::Error] if the RPC is aborted.\n\n@example Basic example\nrequire "google/cloud/container/v1"\n\n# Create a client object. The client can be reused for multiple calls.\nclient = Google::Cloud::Container::V1::ClusterManager::Client.new\n\n# Create a request. To set request fields, pass in keyword arguments.\nrequest = Google::Cloud::Container::V1::DeleteClusterRequest.new\n\n# Call the delete_cluster method.\nresult = client.delete_cluster request\n\n# The returned object is of type Google::Cloud::Container::V1::Operation.\np result',
282
+ 'def delete_cluster request, options = nil\n raise ::ArgumentError, "request must be provided" if request.nil?\n\n request = ::Gapic::Protobuf.coerce request, to: ::Google::Cloud::Container::V1::DeleteClusterRequest\n\n # Converts hash and nil to an options object\n options = ::Gapic::CallOptions.new(**options.to_h) if options.respond_to? :to_h\n\n # Customize the options with defaults\n metadata = @config.rpcs.delete_cluster.metadata.to_h\n\n # Set x-goog-api-client, x-goog-user-project and x-goog-api-version headers\n metadata[:"x-goog-api-client"] ||= ::Gapic::Headers.x_goog_api_client \\\n lib_name: @config.lib_name, lib_version: @config.lib_version,\n gapic_version: ::Google::Cloud::Container::V1::VERSION\n metadata[:"x-goog-api-version"] = API_VERSION unless API_VERSION.empty?\n metadata[:"x-goog-user-project"] = @quota_project_id if @quota_project_id\n\n header_params = {}\n if request.name\n header_params["name"] = request.name\n end\n\n request_params_header = header_params.map { |k, v| "#{k}=#{v}" }.join("&")\n metadata[:"x-goog-request-params"] ||= request_params_header\n\n options.apply_defaults timeout: @config.rpcs.delete_cluster.timeout,\n metadata: metadata,\n retry_policy: @config.rpcs.delete_cluster.retry_policy\n\n options.apply_defaults timeout: @config.timeout,\n metadata: @config.metadata,\n retry_policy: @config.retry_policy\n\n @cluster_manager_stub.call_rpc :delete_cluster, request, options: options do |response, operation|\n yield response, operation if block_given?\n end\n rescue ::GRPC::BadStatus => e\n raise ::Google::Cloud::Error.from_error(e)\n end',
283
+ 'device(deviceType, deviceId = 0) {\n\t return new DLDevice(deviceType, deviceId, this.lib);\n\t }',
284
+ ]
285
+ embeddings = model.encode(sentences)
286
+ print(embeddings.shape)
287
+ # [3, 768]
288
+
289
+ # Get the similarity scores for the embeddings
290
+ similarities = model.similarity(embeddings, embeddings)
291
+ print(similarities.shape)
292
+ # [3, 3]
293
+ ```
294
+
295
+ <!--
296
+ ### Direct Usage (Transformers)
297
+
298
+ <details><summary>Click to see the direct usage in Transformers</summary>
299
+
300
+ </details>
301
+ -->
302
+
303
+ <!--
304
+ ### Downstream Usage (Sentence Transformers)
305
+
306
+ You can finetune this model on your own dataset.
307
+
308
+ <details><summary>Click to expand</summary>
309
+
310
+ </details>
311
+ -->
312
+
313
+ <!--
314
+ ### Out-of-Scope Use
315
+
316
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
317
+ -->
318
+
319
+ <!--
320
+ ## Bias, Risks and Limitations
321
+
322
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
323
+ -->
324
+
325
+ <!--
326
+ ### Recommendations
327
+
328
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
329
+ -->
330
+
331
+ ## Training Details
332
+
333
+ ### Training Dataset
334
+
335
+ #### Unnamed Dataset
336
+
337
+ * Size: 3,999,600 training samples
338
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
339
+ * Approximate statistics based on the first 1000 samples:
340
+ | | sentence_0 | sentence_1 | label |
341
+ |:--------|:------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------|
342
+ | type | string | string | float |
343
+ | details | <ul><li>min: 8 tokens</li><li>mean: 74.13 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 13 tokens</li><li>mean: 154.33 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
344
+ * Samples:
345
+ | sentence_0 | sentence_1 | label |
346
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
347
+ | <code>Set the column title<br><br>@param column - column number (first column is: 0)<br>@param title - new column title</code> | <code>setHeader = function(column, newValue) {<br> const obj = this;<br><br> if (obj.headers[column]) {<br> const oldValue = obj.headers[column].textContent;<br> const onchangeheaderOldValue = (obj.options.columns && obj.options.columns[column] && obj.options.columns[column].title) || '';<br><br> if (! newValue) {<br> newValue = getColumnName(column);<br> }<br><br> obj.headers[column].textContent = newValue;<br> // Keep the title property<br> obj.headers[column].setAttribute('title', newValue);<br> // Update title<br> if (!obj.options.columns) {<br> obj.options.columns = [];<br> }<br> if (!obj.options.columns[column]) {<br> obj.options.columns[column] = {};<br> }<br> obj.options.columns[column].title = newValue;<br><br> setHistory.call(obj, {<br> action: 'setHeader',<br> column: column,<br> oldValue: oldValue,<br> newValue: newValue<br> });<br><br> // On onchange header<br> dispatch.c...</code> | <code>1.0</code> |
348
+ | <code>Elsewhere this is known as a "Weak Value Map". Whereas a std JS WeakMap<br>is weak on its keys, this map is weak on its values. It does not retain these<br>values strongly. If a given value disappears, then the entries for it<br>disappear from every weak-value-map that holds it as a value.<br><br>Just as a WeakMap only allows gc-able values as keys, a weak-value-map<br>only allows gc-able values as values.<br><br>Unlike a WeakMap, a weak-value-map unavoidably exposes the non-determinism of<br>gc to its clients. Thus, both the ability to create one, as well as each<br>created one, must be treated as dangerous capabilities that must be closely<br>held. A program with access to these can read side channels though gc that do<br>not* rely on the ability to measure duration. This is a separate, and bad,<br>timing-independent side channel.<br><br>This non-determinism also enables code to escape deterministic replay. In a<br>blockchain context, this could cause validators to differ from each other,<br>preventing consensus, and thus preventing ...</code> | <code>makeFinalizingMap = (finalizer, opts) => {<br> const { weakValues = false } = opts || {};<br> if (!weakValues || !WeakRef || !FinalizationRegistry) {<br> /** @type Map<K, V> */<br> const keyToVal = new Map();<br> return Far('fakeFinalizingMap', {<br> clearWithoutFinalizing: keyToVal.clear.bind(keyToVal),<br> get: keyToVal.get.bind(keyToVal),<br> has: keyToVal.has.bind(keyToVal),<br> set: (key, val) => {<br> keyToVal.set(key, val);<br> },<br> delete: keyToVal.delete.bind(keyToVal),<br> getSize: () => keyToVal.size,<br> });<br> }<br> /** @type Map<K, WeakRef<any>> */<br> const keyToRef = new Map();<br> const registry = new FinalizationRegistry(key => {<br> // Because this will delete the current binding of `key`, we need to<br> // be sure that it is not called because a previous binding was collected.<br> // We do this with the `unregister` in `set` below, assuming that<br> // `unregister` *immediately* suppresses the finalization of the thing<br> // it unregisters. TODO If this is...</code> | <code>1.0</code> |
349
+ | <code>Creates a function that memoizes the result of `func`. If `resolver` is<br>provided, it determines the cache key for storing the result based on the<br>arguments provided to the memoized function. By default, the first argument<br>provided to the memoized function is used as the map cache key. The `func`<br>is invoked with the `this` binding of the memoized function.<br><br>**Note:** The cache is exposed as the `cache` property on the memoized<br>function. Its creation may be customized by replacing the `_.memoize.Cache`<br>constructor with one whose instances implement the<br>[`Map`](http://ecma-international.org/ecma-262/6.0/#sec-properties-of-the-map-prototype-object)<br>method interface of `delete`, `get`, `has`, and `set`.<br><br>@static<br>@memberOf _<br>@since 0.1.0<br>@category Function<br>@param {Function} func The function to have its output memoized.<br>@param {Function} [resolver] The function to resolve the cache key.<br>@returns {Function} Returns the new memoized function.<br>@example<br><br>var object = { 'a': 1, 'b': 2 };<br>var othe...</code> | <code>function memoize(func, resolver) {<br> if (typeof func != 'function' || (resolver && typeof resolver != 'function')) {<br> throw new TypeError(FUNC_ERROR_TEXT);<br> }<br> var memoized = function() {<br> var args = arguments,<br> key = resolver ? resolver.apply(this, args) : args[0],<br> cache = memoized.cache;<br><br> if (cache.has(key)) {<br> return cache.get(key);<br> }<br> var result = func.apply(this, args);<br> memoized.cache = cache.set(key, result);<br> return result;<br> };<br> memoized.cache = new (memoize.Cache || MapCache);<br> return memoized;<br> }</code> | <code>1.0</code> |
350
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
351
+ ```json
352
+ {
353
+ "scale": 20.0,
354
+ "similarity_fct": "cos_sim"
355
+ }
356
+ ```
357
+
358
+ ### Training Hyperparameters
359
+ #### Non-Default Hyperparameters
360
+
361
+ - `per_device_train_batch_size`: 150
362
+ - `per_device_eval_batch_size`: 150
363
+ - `num_train_epochs`: 1
364
+ - `fp16`: True
365
+ - `multi_dataset_batch_sampler`: round_robin
366
+
367
+ #### All Hyperparameters
368
+ <details><summary>Click to expand</summary>
369
+
370
+ - `overwrite_output_dir`: False
371
+ - `do_predict`: False
372
+ - `eval_strategy`: no
373
+ - `prediction_loss_only`: True
374
+ - `per_device_train_batch_size`: 150
375
+ - `per_device_eval_batch_size`: 150
376
+ - `per_gpu_train_batch_size`: None
377
+ - `per_gpu_eval_batch_size`: None
378
+ - `gradient_accumulation_steps`: 1
379
+ - `eval_accumulation_steps`: None
380
+ - `torch_empty_cache_steps`: None
381
+ - `learning_rate`: 5e-05
382
+ - `weight_decay`: 0.0
383
+ - `adam_beta1`: 0.9
384
+ - `adam_beta2`: 0.999
385
+ - `adam_epsilon`: 1e-08
386
+ - `max_grad_norm`: 1
387
+ - `num_train_epochs`: 1
388
+ - `max_steps`: -1
389
+ - `lr_scheduler_type`: linear
390
+ - `lr_scheduler_kwargs`: {}
391
+ - `warmup_ratio`: 0.0
392
+ - `warmup_steps`: 0
393
+ - `log_level`: passive
394
+ - `log_level_replica`: warning
395
+ - `log_on_each_node`: True
396
+ - `logging_nan_inf_filter`: True
397
+ - `save_safetensors`: True
398
+ - `save_on_each_node`: False
399
+ - `save_only_model`: False
400
+ - `restore_callback_states_from_checkpoint`: False
401
+ - `no_cuda`: False
402
+ - `use_cpu`: False
403
+ - `use_mps_device`: False
404
+ - `seed`: 42
405
+ - `data_seed`: None
406
+ - `jit_mode_eval`: False
407
+ - `use_ipex`: False
408
+ - `bf16`: False
409
+ - `fp16`: True
410
+ - `fp16_opt_level`: O1
411
+ - `half_precision_backend`: auto
412
+ - `bf16_full_eval`: False
413
+ - `fp16_full_eval`: False
414
+ - `tf32`: None
415
+ - `local_rank`: 0
416
+ - `ddp_backend`: None
417
+ - `tpu_num_cores`: None
418
+ - `tpu_metrics_debug`: False
419
+ - `debug`: []
420
+ - `dataloader_drop_last`: False
421
+ - `dataloader_num_workers`: 0
422
+ - `dataloader_prefetch_factor`: None
423
+ - `past_index`: -1
424
+ - `disable_tqdm`: False
425
+ - `remove_unused_columns`: True
426
+ - `label_names`: None
427
+ - `load_best_model_at_end`: False
428
+ - `ignore_data_skip`: False
429
+ - `fsdp`: []
430
+ - `fsdp_min_num_params`: 0
431
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
432
+ - `fsdp_transformer_layer_cls_to_wrap`: None
433
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
434
+ - `deepspeed`: None
435
+ - `label_smoothing_factor`: 0.0
436
+ - `optim`: adamw_torch
437
+ - `optim_args`: None
438
+ - `adafactor`: False
439
+ - `group_by_length`: False
440
+ - `length_column_name`: length
441
+ - `ddp_find_unused_parameters`: None
442
+ - `ddp_bucket_cap_mb`: None
443
+ - `ddp_broadcast_buffers`: False
444
+ - `dataloader_pin_memory`: True
445
+ - `dataloader_persistent_workers`: False
446
+ - `skip_memory_metrics`: True
447
+ - `use_legacy_prediction_loop`: False
448
+ - `push_to_hub`: False
449
+ - `resume_from_checkpoint`: None
450
+ - `hub_model_id`: None
451
+ - `hub_strategy`: every_save
452
+ - `hub_private_repo`: None
453
+ - `hub_always_push`: False
454
+ - `hub_revision`: None
455
+ - `gradient_checkpointing`: False
456
+ - `gradient_checkpointing_kwargs`: None
457
+ - `include_inputs_for_metrics`: False
458
+ - `include_for_metrics`: []
459
+ - `eval_do_concat_batches`: True
460
+ - `fp16_backend`: auto
461
+ - `push_to_hub_model_id`: None
462
+ - `push_to_hub_organization`: None
463
+ - `mp_parameters`:
464
+ - `auto_find_batch_size`: False
465
+ - `full_determinism`: False
466
+ - `torchdynamo`: None
467
+ - `ray_scope`: last
468
+ - `ddp_timeout`: 1800
469
+ - `torch_compile`: False
470
+ - `torch_compile_backend`: None
471
+ - `torch_compile_mode`: None
472
+ - `include_tokens_per_second`: False
473
+ - `include_num_input_tokens_seen`: False
474
+ - `neftune_noise_alpha`: None
475
+ - `optim_target_modules`: None
476
+ - `batch_eval_metrics`: False
477
+ - `eval_on_start`: False
478
+ - `use_liger_kernel`: False
479
+ - `liger_kernel_config`: None
480
+ - `eval_use_gather_object`: False
481
+ - `average_tokens_across_devices`: False
482
+ - `prompts`: None
483
+ - `batch_sampler`: batch_sampler
484
+ - `multi_dataset_batch_sampler`: round_robin
485
+
486
+ </details>
487
+
488
+ ### Training Logs
489
+ | Epoch | Step | Training Loss |
490
+ |:------:|:-----:|:-------------:|
491
+ | 0.0188 | 500 | 0.2957 |
492
+ | 0.0375 | 1000 | 0.1174 |
493
+ | 0.0563 | 1500 | 0.1148 |
494
+ | 0.0750 | 2000 | 0.104 |
495
+ | 0.0938 | 2500 | 0.0977 |
496
+ | 0.1125 | 3000 | 0.0944 |
497
+ | 0.1313 | 3500 | 0.0885 |
498
+ | 0.1500 | 4000 | 0.083 |
499
+ | 0.1688 | 4500 | 0.0817 |
500
+ | 0.1875 | 5000 | 0.077 |
501
+ | 0.2063 | 5500 | 0.0764 |
502
+ | 0.2250 | 6000 | 0.0725 |
503
+ | 0.2438 | 6500 | 0.0698 |
504
+ | 0.2625 | 7000 | 0.0663 |
505
+ | 0.2813 | 7500 | 0.0644 |
506
+ | 0.3000 | 8000 | 0.0606 |
507
+ | 0.3188 | 8500 | 0.0587 |
508
+ | 0.3375 | 9000 | 0.0596 |
509
+ | 0.3563 | 9500 | 0.0566 |
510
+ | 0.3750 | 10000 | 0.0536 |
511
+ | 0.3938 | 10500 | 0.0514 |
512
+ | 0.4125 | 11000 | 0.0532 |
513
+ | 0.4313 | 11500 | 0.0501 |
514
+ | 0.4500 | 12000 | 0.0478 |
515
+ | 0.4688 | 12500 | 0.0483 |
516
+ | 0.4875 | 13000 | 0.0461 |
517
+ | 0.5063 | 13500 | 0.0444 |
518
+ | 0.5251 | 14000 | 0.0443 |
519
+ | 0.5438 | 14500 | 0.0402 |
520
+ | 0.5626 | 15000 | 0.0417 |
521
+ | 0.5813 | 15500 | 0.0386 |
522
+ | 0.6001 | 16000 | 0.0421 |
523
+ | 0.6188 | 16500 | 0.0368 |
524
+ | 0.6376 | 17000 | 0.036 |
525
+ | 0.6563 | 17500 | 0.0352 |
526
+ | 0.6751 | 18000 | 0.0339 |
527
+ | 0.6938 | 18500 | 0.0336 |
528
+ | 0.7126 | 19000 | 0.0334 |
529
+ | 0.7313 | 19500 | 0.0312 |
530
+ | 0.7501 | 20000 | 0.0325 |
531
+ | 0.7688 | 20500 | 0.0317 |
532
+ | 0.7876 | 21000 | 0.0284 |
533
+ | 0.8063 | 21500 | 0.0281 |
534
+ | 0.8251 | 22000 | 0.0294 |
535
+ | 0.8438 | 22500 | 0.0283 |
536
+ | 0.8626 | 23000 | 0.0277 |
537
+ | 0.8813 | 23500 | 0.0268 |
538
+ | 0.9001 | 24000 | 0.0254 |
539
+ | 0.9188 | 24500 | 0.0249 |
540
+ | 0.9376 | 25000 | 0.0255 |
541
+ | 0.9563 | 25500 | 0.0251 |
542
+ | 0.9751 | 26000 | 0.0244 |
543
+ | 0.9938 | 26500 | 0.0249 |
544
+
545
+
546
+ ### Framework Versions
547
+ - Python: 3.11.13
548
+ - Sentence Transformers: 4.1.0
549
+ - Transformers: 4.53.2
550
+ - PyTorch: 2.6.0+cu124
551
+ - Accelerate: 1.9.0
552
+ - Datasets: 3.6.0
553
+ - Tokenizers: 0.21.2
554
+
555
+ ## Citation
556
+
557
+ ### BibTeX
558
+
559
+ #### Sentence Transformers
560
+ ```bibtex
561
+ @inproceedings{reimers-2019-sentence-bert,
562
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
563
+ author = "Reimers, Nils and Gurevych, Iryna",
564
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
565
+ month = "11",
566
+ year = "2019",
567
+ publisher = "Association for Computational Linguistics",
568
+ url = "https://arxiv.org/abs/1908.10084",
569
+ }
570
+ ```
571
+
572
+ #### MultipleNegativesRankingLoss
573
+ ```bibtex
574
+ @misc{henderson2017efficient,
575
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
576
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
577
+ year={2017},
578
+ eprint={1705.00652},
579
+ archivePrefix={arXiv},
580
+ primaryClass={cs.CL}
581
+ }
582
+ ```
583
+
584
+ <!--
585
+ ## Glossary
586
+
587
+ *Clearly define terms in order to be accessible across audiences.*
588
+ -->
589
+
590
+ <!--
591
+ ## Model Card Authors
592
+
593
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
594
+ -->
595
+
596
+ <!--
597
+ ## Model Card Contact
598
+
599
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
600
+ -->
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 50001,
3
+ "<mask>": 50004,
4
+ "<pad>": 50003,
5
+ "<s>": 50000,
6
+ "<unk>": 50002
7
+ }
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "bos_token_id": 50000,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "cls",
13
+ "cls_token_id": 50281,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50001,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "hidden_activation": "gelu",
21
+ "hidden_dropout_prob": 0.1,
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "local_attention": 128,
27
+ "local_attention_rope_theta": 10000,
28
+ "local_attention_window": 128,
29
+ "local_rope_theta": 10000.0,
30
+ "max_position_embeddings": 8192,
31
+ "mlp_bias": false,
32
+ "mlp_dropout": 0.0,
33
+ "model_type": "modernbert",
34
+ "norm_bias": false,
35
+ "norm_eps": 1e-05,
36
+ "num_attention_heads": 12,
37
+ "num_hidden_layers": 22,
38
+ "pad_token_id": 1,
39
+ "repad_logits_with_grad": false,
40
+ "rope_theta": 160000,
41
+ "sep_token_id": 50282,
42
+ "sparse_pred_ignore_index": -100,
43
+ "sparse_prediction": false,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.53.2",
46
+ "type_vocab_size": 2,
47
+ "vocab_size": 50005
48
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.53.2",
5
+ "pytorch": "2.6.0+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba52a05cf9415c600d0ba5637fa8e20bfc776a77e54e2485a08f9d45b0da3de0
3
+ size 594955000
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 1024,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50000": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50001": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50002": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "50003": {
29
+ "content": "<pad>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50004": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "max_length": 256,
53
+ "model_max_length": 1000000000000000019884624838656,
54
+ "pad_token": "<pad>",
55
+ "sep_token": "</s>",
56
+ "stride": 0,
57
+ "tokenizer_class": "RobertaTokenizer",
58
+ "trim_offsets": true,
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "<unk>"
62
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff