Upload ModernBERT model

Browse files

Files changed (13) hide show

1_Pooling/config.json +10 -0
README.md +600 -0
added_tokens.json +7 -0
config.json +48 -0
config_sentence_transformers.json +10 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
vocab.json +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,600 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:3999600
+- loss:MultipleNegativesRankingLoss
+base_model: Shuu12121/CodeModernBERT-Owl-v1
+widget:
+- source_sentence: 'The list of databases
+    Generated from protobuf field <code>repeated .google.cloud.alloydb.v1.Database
+    databases = 1;</code>
+    @param array<\Google\Cloud\AlloyDb\V1\Database>|\Google\Protobuf\Internal\RepeatedField
+    $var
+    @return $this'
+  sentences:
+  - "public function setDatabases($var)\n    {\n        $arr = GPBUtil::checkRepeatedField($var,\
+    \ \\Google\\Protobuf\\Internal\\GPBType::MESSAGE, \\Google\\Cloud\\AlloyDb\\V1\\\
+    Database::class);\n        $this->databases = $arr;\n\n        return $this;\n\
+    \    }"
+  - "public function toApiArray($row, $role = 'guest', $deep = true)\n    {\n    \
+    \    return $row;\n    }"
+  - "def adapter( type = :file, *args, &block )\n        adapters.add(type, *args,\
+    \ &block)\n      end"
+- source_sentence: 'Gets mention
+    @return \LINE\Webhook\Model\Mention|null'
+  sentences:
+  - "public final ListPhoneNumbersPagedResponse listPhoneNumbers(String parent) {\n\
+    \    ListPhoneNumbersRequest request =\n        ListPhoneNumbersRequest.newBuilder().setParent(parent).build();\n\
+    \    return listPhoneNumbers(request);\n  }"
+  - "public function getMention()\n    {\n        return $this->container['mention'];\n\
+    \    }"
+  - "function parse(raw, opts) {\n  opts = opts || {};\n\n  var preserveNumbers =\
+    \ opts.preserveNumbers;\n  var trim = function (s) {\n    return s.trim();\n \
+    \ };\n  var obj = {};\n\n  getKeyValueChunks(raw).map(trim).filter(Boolean).forEach(function\
+    \ (item) {\n    // split with `.indexOf` rather than `.split` because the value\
+    \ may also contain colons.\n    var pos = item.indexOf(':');\n    var key = item.substr(0,\
+    \ pos).trim();\n    var val = item.substr(pos + 1).trim();\n    if (preserveNumbers\
+    \ && isNumeric(val)) {\n      val = Number(val);\n    }\n\n    obj[key] = val;\n\
+    \  });\n\n  return obj;\n}"
+- source_sentence: reset clears the current state of the tree.
+  sentences:
+  - "func (a *TreeToListAdapter) reset() {\n\tcount := a.adapter.Count()\n\ta.node.descendants\
+    \ = count\n\ta.node.children = make([]*TreeToListNode, count)\n\tfor i := range\
+    \ a.node.children {\n\t\tnode := a.adapter.NodeAt(i)\n\t\titem := node.Item()\n\
+    \t\ta.node.children[i] = &TreeToListNode{container: node, item: item, parent:\
+    \ a}\n\t}\n}"
+  - "public void setPhases(java.util.List<Phase> phases) {\n        this.phases =\
+    \ phases;\n    }"
+  - "protected function getJobIdsByQueue($queue)\n    {\n        $failer = $this->laravel['queue.failer'];\n\
+    \n        $ids = method_exists($failer, 'ids')\n            ? $failer->ids($queue)\n\
+    \            : collect($failer->all())\n                ->where('queue', $queue)\n\
+    \                ->pluck('id')\n                ->toArray();\n\n        if (count($ids)\
+    \ === 0) {\n            $this->components->error(\"Unable to find failed jobs\
+    \ for queue [{$queue}].\");\n        }\n\n        return $ids;\n    }"
+- source_sentence: Creates a new `JsNativeError` from its `kind`, `message` and (optionally)
+    its `cause`.
+  sentences:
+  - "public function group(array $attributes, Closure $callback)\n    {\n        $previousGroupAttributes\
+    \ = $this->groupAttributes;\n        $this->groupAttributes = array_merge_recursive($previousGroupAttributes,\
+    \ $attributes);\n\n        \\call_user_func($callback, $this);\n\n        $this->groupAttributes\
+    \ = $previousGroupAttributes;\n    }"
+  - "const fn new(\n        kind: JsNativeErrorKind,\n        message: Cow<'static,\
+    \ str>,\n        cause: Option<Box<JsError>>,\n    ) -> Self {\n        Self {\n\
+    \            kind,\n            message,\n            cause,\n            realm:\
+    \ None,\n        }\n    }"
+  - "fn column_id_to_prune(&self, name: &str) -> Option<ColumnId> {\n        let metadata\
+    \ = self\n            .expected_metadata\n            .as_ref()\n            .unwrap_or_else(||\
+    \ self.read_format.metadata());\n        metadata.column_by_name(name).map(|col|\
+    \ col.column_id)\n    }"
+- source_sentence: '#
+    Deletes the cluster, including the Kubernetes endpoint and all worker
+    nodes.
+    Firewalls and routes that were configured during cluster creation
+    are also deleted.
+    Other Google Compute Engine resources that might be in use by the cluster,
+    such as load balancer resources, are not deleted if they weren''t present
+    when the cluster was initially created.
+    @overload delete_cluster(request, options = nil)
+    Pass arguments to `delete_cluster` via a request object, either of type
+    {::Google::Cloud::Container::V1::DeleteClusterRequest} or an equivalent Hash.
+    @param request [::Google::Cloud::Container::V1::DeleteClusterRequest, ::Hash]
+    A request object representing the call parameters. Required. To specify no
+    parameters, or to keep all the default parameter values, pass an empty Hash.
+    @param options [::Gapic::CallOptions, ::Hash]
+    Overrides the default settings for this call, e.g, timeout, retries, etc. Optional.
+    @overload delete_cluster(project_id: nil, zone: nil, cluster_id: nil, name: nil)
+    Pass arguments to `delete_cluster` via keyword arguments. Note that at
+    least one keyword argument is required. To specify no parameters, or to keep all
+    the default parameter values, pass an empty Hash as a request object (see above).
+    @param project_id [::String]
+    Deprecated. The Google Developers Console [project ID or project
+    number](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
+    This field has been deprecated and replaced by the name field.
+    @param zone [::String]
+    Deprecated. The name of the Google Compute Engine
+    [zone](https://cloud.google.com/compute/docs/zones#available) in which the
+    cluster resides. This field has been deprecated and replaced by the name
+    field.
+    @param cluster_id [::String]
+    Deprecated. The name of the cluster to delete.
+    This field has been deprecated and replaced by the name field.
+    @param name [::String]
+    The name (project, location, cluster) of the cluster to delete.
+    Specified in the format `projects/*/locations/*/clusters/*`.
+    @yield [response, operation] Access the result along with the RPC operation
+    @yieldparam response [::Google::Cloud::Container::V1::Operation]
+    @yieldparam operation [::GRPC::ActiveCall::Operation]
+    @return [::Google::Cloud::Container::V1::Operation]
+    @raise [::Google::Cloud::Error] if the RPC is aborted.
+    @example Basic example
+    require "google/cloud/container/v1"
+    # Create a client object. The client can be reused for multiple calls.
+    client = Google::Cloud::Container::V1::ClusterManager::Client.new
+    # Create a request. To set request fields, pass in keyword arguments.
+    request = Google::Cloud::Container::V1::DeleteClusterRequest.new
+    # Call the delete_cluster method.
+    result = client.delete_cluster request
+    # The returned object is of type Google::Cloud::Container::V1::Operation.
+    p result'
+  sentences:
+  - "public function getWiFiSecurityType()\n    {\n        if (array_key_exists(\"\
+    wiFiSecurityType\", $this->_propDict)) {\n            if (is_a($this->_propDict[\"\
+    wiFiSecurityType\"], \"\\Beta\\Microsoft\\Graph\\Model\\AospDeviceOwnerWiFiSecurityType\"\
+    ) || is_null($this->_propDict[\"wiFiSecurityType\"])) {\n                return\
+    \ $this->_propDict[\"wiFiSecurityType\"];\n            } else {\n            \
+    \    $this->_propDict[\"wiFiSecurityType\"] = new AospDeviceOwnerWiFiSecurityType($this->_propDict[\"\
+    wiFiSecurityType\"]);\n                return $this->_propDict[\"wiFiSecurityType\"\
+    ];\n            }\n        }\n        return null;\n    }"
+  - "device(deviceType, deviceId = 0) {\n\t        return new DLDevice(deviceType,\
+    \ deviceId, this.lib);\n\t    }"
+  - "def delete_cluster request, options = nil\n              raise ::ArgumentError,\
+    \ \"request must be provided\" if request.nil?\n\n              request = ::Gapic::Protobuf.coerce\
+    \ request, to: ::Google::Cloud::Container::V1::DeleteClusterRequest\n\n      \
+    \        # Converts hash and nil to an options object\n              options =\
+    \ ::Gapic::CallOptions.new(**options.to_h) if options.respond_to? :to_h\n\n  \
+    \            # Customize the options with defaults\n              metadata = @config.rpcs.delete_cluster.metadata.to_h\n\
+    \n              # Set x-goog-api-client, x-goog-user-project and x-goog-api-version\
+    \ headers\n              metadata[:\"x-goog-api-client\"] ||= ::Gapic::Headers.x_goog_api_client\
+    \ \\\n                lib_name: @config.lib_name, lib_version: @config.lib_version,\n\
+    \                gapic_version: ::Google::Cloud::Container::V1::VERSION\n    \
+    \          metadata[:\"x-goog-api-version\"] = API_VERSION unless API_VERSION.empty?\n\
+    \              metadata[:\"x-goog-user-project\"] = @quota_project_id if @quota_project_id\n\
+    \n              header_params = {}\n              if request.name\n          \
+    \      header_params[\"name\"] = request.name\n              end\n\n         \
+    \     request_params_header = header_params.map { |k, v| \"#{k}=#{v}\" }.join(\"\
+    &\")\n              metadata[:\"x-goog-request-params\"] ||= request_params_header\n\
+    \n              options.apply_defaults timeout:      @config.rpcs.delete_cluster.timeout,\n\
+    \                                     metadata:     metadata,\n              \
+    \                       retry_policy: @config.rpcs.delete_cluster.retry_policy\n\
+    \n              options.apply_defaults timeout:      @config.timeout,\n      \
+    \                               metadata:     @config.metadata,\n            \
+    \                         retry_policy: @config.retry_policy\n\n             \
+    \ @cluster_manager_stub.call_rpc :delete_cluster, request, options: options do\
+    \ |response, operation|\n                yield response, operation if block_given?\n\
+    \              end\n            rescue ::GRPC::BadStatus => e\n              raise\
+    \ ::Google::Cloud::Error.from_error(e)\n            end"
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on Shuu12121/CodeModernBERT-Owl-v1
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Owl-v1](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-v1). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [Shuu12121/CodeModernBERT-Owl-v1](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-v1) <!-- at revision 33220abe62ef7d02fc36c62487e77751459d8c1a -->
+- **Maximum Sequence Length:** 1024 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    '#\nDeletes the cluster, including the Kubernetes endpoint and all worker\nnodes.\n\nFirewalls and routes that were configured during cluster creation\nare also deleted.\n\nOther Google Compute Engine resources that might be in use by the cluster,\nsuch as load balancer resources, are not deleted if they weren\'t present\nwhen the cluster was initially created.\n\n@overload delete_cluster(request, options = nil)\nPass arguments to `delete_cluster` via a request object, either of type\n{::Google::Cloud::Container::V1::DeleteClusterRequest} or an equivalent Hash.\n\n@param request [::Google::Cloud::Container::V1::DeleteClusterRequest, ::Hash]\nA request object representing the call parameters. Required. To specify no\nparameters, or to keep all the default parameter values, pass an empty Hash.\n@param options [::Gapic::CallOptions, ::Hash]\nOverrides the default settings for this call, e.g, timeout, retries, etc. Optional.\n\n@overload delete_cluster(project_id: nil, zone: nil, cluster_id: nil, name: nil)\nPass arguments to `delete_cluster` via keyword arguments. Note that at\nleast one keyword argument is required. To specify no parameters, or to keep all\nthe default parameter values, pass an empty Hash as a request object (see above).\n\n@param project_id [::String]\nDeprecated. The Google Developers Console [project ID or project\nnumber](https://cloud.google.com/resource-manager/docs/creating-managing-projects).\nThis field has been deprecated and replaced by the name field.\n@param zone [::String]\nDeprecated. The name of the Google Compute Engine\n[zone](https://cloud.google.com/compute/docs/zones#available) in which the\ncluster resides. This field has been deprecated and replaced by the name\nfield.\n@param cluster_id [::String]\nDeprecated. The name of the cluster to delete.\nThis field has been deprecated and replaced by the name field.\n@param name [::String]\nThe name (project, location, cluster) of the cluster to delete.\nSpecified in the format `projects/*/locations/*/clusters/*`.\n\n@yield [response, operation] Access the result along with the RPC operation\n@yieldparam response [::Google::Cloud::Container::V1::Operation]\n@yieldparam operation [::GRPC::ActiveCall::Operation]\n\n@return [::Google::Cloud::Container::V1::Operation]\n\n@raise [::Google::Cloud::Error] if the RPC is aborted.\n\n@example Basic example\nrequire "google/cloud/container/v1"\n\n# Create a client object. The client can be reused for multiple calls.\nclient = Google::Cloud::Container::V1::ClusterManager::Client.new\n\n# Create a request. To set request fields, pass in keyword arguments.\nrequest = Google::Cloud::Container::V1::DeleteClusterRequest.new\n\n# Call the delete_cluster method.\nresult = client.delete_cluster request\n\n# The returned object is of type Google::Cloud::Container::V1::Operation.\np result',
+    'def delete_cluster request, options = nil\n              raise ::ArgumentError, "request must be provided" if request.nil?\n\n              request = ::Gapic::Protobuf.coerce request, to: ::Google::Cloud::Container::V1::DeleteClusterRequest\n\n              # Converts hash and nil to an options object\n              options = ::Gapic::CallOptions.new(**options.to_h) if options.respond_to? :to_h\n\n              # Customize the options with defaults\n              metadata = @config.rpcs.delete_cluster.metadata.to_h\n\n              # Set x-goog-api-client, x-goog-user-project and x-goog-api-version headers\n              metadata[:"x-goog-api-client"] ||= ::Gapic::Headers.x_goog_api_client \\\n                lib_name: @config.lib_name, lib_version: @config.lib_version,\n                gapic_version: ::Google::Cloud::Container::V1::VERSION\n              metadata[:"x-goog-api-version"] = API_VERSION unless API_VERSION.empty?\n              metadata[:"x-goog-user-project"] = @quota_project_id if @quota_project_id\n\n              header_params = {}\n              if request.name\n                header_params["name"] = request.name\n              end\n\n              request_params_header = header_params.map { |k, v| "#{k}=#{v}" }.join("&")\n              metadata[:"x-goog-request-params"] ||= request_params_header\n\n              options.apply_defaults timeout:      @config.rpcs.delete_cluster.timeout,\n                                     metadata:     metadata,\n                                     retry_policy: @config.rpcs.delete_cluster.retry_policy\n\n              options.apply_defaults timeout:      @config.timeout,\n                                     metadata:     @config.metadata,\n                                     retry_policy: @config.retry_policy\n\n              @cluster_manager_stub.call_rpc :delete_cluster, request, options: options do |response, operation|\n                yield response, operation if block_given?\n              end\n            rescue ::GRPC::BadStatus => e\n              raise ::Google::Cloud::Error.from_error(e)\n            end',
+    'device(deviceType, deviceId = 0) {\n\t        return new DLDevice(deviceType, deviceId, this.lib);\n\t    }',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 3,999,600 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                          | sentence_1                                                                            | label                                                         |
+  |:--------|:------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------|
+  | type    | string                                                                              | string                                                                                | float                                                         |
+  | details | <ul><li>min: 8 tokens</li><li>mean: 74.13 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 13 tokens</li><li>mean: 154.33 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | label            |
+  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>Set the column title<br><br>@param column - column number (first column is: 0)<br>@param title - new column title</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | <code>setHeader = function(column, newValue) {<br>    const obj = this;<br><br>    if (obj.headers[column]) {<br>        const oldValue = obj.headers[column].textContent;<br>        const onchangeheaderOldValue = (obj.options.columns && obj.options.columns[column] && obj.options.columns[column].title) || '';<br><br>        if (! newValue) {<br>            newValue = getColumnName(column);<br>        }<br><br>        obj.headers[column].textContent = newValue;<br>        // Keep the title property<br>        obj.headers[column].setAttribute('title', newValue);<br>        // Update title<br>        if (!obj.options.columns) {<br>            obj.options.columns = [];<br>        }<br>        if (!obj.options.columns[column]) {<br>            obj.options.columns[column] = {};<br>        }<br>        obj.options.columns[column].title = newValue;<br><br>        setHistory.call(obj, {<br>            action: 'setHeader',<br>            column: column,<br>            oldValue: oldValue,<br>            newValue: newValue<br>        });<br><br>        // On onchange header<br>        dispatch.c...</code> | <code>1.0</code> |
+  | <code>Elsewhere this is known as a "Weak Value Map". Whereas a std JS WeakMap<br>is weak on its keys, this map is weak on its values. It does not retain these<br>values strongly. If a given value disappears, then the entries for it<br>disappear from every weak-value-map that holds it as a value.<br><br>Just as a WeakMap only allows gc-able values as keys, a weak-value-map<br>only allows gc-able values as values.<br><br>Unlike a WeakMap, a weak-value-map unavoidably exposes the non-determinism of<br>gc to its clients. Thus, both the ability to create one, as well as each<br>created one, must be treated as dangerous capabilities that must be closely<br>held. A program with access to these can read side channels though gc that do<br>not* rely on the ability to measure duration. This is a separate, and bad,<br>timing-independent side channel.<br><br>This non-determinism also enables code to escape deterministic replay. In a<br>blockchain context, this could cause validators to differ from each other,<br>preventing consensus, and thus preventing ...</code>                | <code>makeFinalizingMap = (finalizer, opts) => {<br>  const { weakValues = false } = opts || {};<br>  if (!weakValues || !WeakRef || !FinalizationRegistry) {<br>    /** @type Map<K, V> */<br>    const keyToVal = new Map();<br>    return Far('fakeFinalizingMap', {<br>      clearWithoutFinalizing: keyToVal.clear.bind(keyToVal),<br>      get: keyToVal.get.bind(keyToVal),<br>      has: keyToVal.has.bind(keyToVal),<br>      set: (key, val) => {<br>        keyToVal.set(key, val);<br>      },<br>      delete: keyToVal.delete.bind(keyToVal),<br>      getSize: () => keyToVal.size,<br>    });<br>  }<br>  /** @type Map<K, WeakRef<any>> */<br>  const keyToRef = new Map();<br>  const registry = new FinalizationRegistry(key => {<br>    // Because this will delete the current binding of `key`, we need to<br>    // be sure that it is not called because a previous binding was collected.<br>    // We do this with the `unregister` in `set` below, assuming that<br>    // `unregister` *immediately* suppresses the finalization of the thing<br>    // it unregisters. TODO If this is...</code>                         | <code>1.0</code> |
+  | <code>Creates a function that memoizes the result of `func`. If `resolver` is<br>provided, it determines the cache key for storing the result based on the<br>arguments provided to the memoized function. By default, the first argument<br>provided to the memoized function is used as the map cache key. The `func`<br>is invoked with the `this` binding of the memoized function.<br><br>**Note:** The cache is exposed as the `cache` property on the memoized<br>function. Its creation may be customized by replacing the `_.memoize.Cache`<br>constructor with one whose instances implement the<br>[`Map`](http://ecma-international.org/ecma-262/6.0/#sec-properties-of-the-map-prototype-object)<br>method interface of `delete`, `get`, `has`, and `set`.<br><br>@static<br>@memberOf _<br>@since 0.1.0<br>@category Function<br>@param {Function} func The function to have its output memoized.<br>@param {Function} [resolver] The function to resolve the cache key.<br>@returns {Function} Returns the new memoized function.<br>@example<br><br>var object = { 'a': 1, 'b': 2 };<br>var othe...</code> | <code>function memoize(func, resolver) {<br>      if (typeof func != 'function' || (resolver && typeof resolver != 'function')) {<br>        throw new TypeError(FUNC_ERROR_TEXT);<br>      }<br>      var memoized = function() {<br>        var args = arguments,<br>            key = resolver ? resolver.apply(this, args) : args[0],<br>            cache = memoized.cache;<br><br>        if (cache.has(key)) {<br>          return cache.get(key);<br>        }<br>        var result = func.apply(this, args);<br>        memoized.cache = cache.set(key, result);<br>        return result;<br>      };<br>      memoized.cache = new (memoize.Cache || MapCache);<br>      return memoized;<br>    }</code>                                                                                                                                                                                                                                                                                                                                                                                                                                 | <code>1.0</code> |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 150
+- `per_device_eval_batch_size`: 150
+- `num_train_epochs`: 1
+- `fp16`: True
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 150
+- `per_device_eval_batch_size`: 150
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+| Epoch  | Step  | Training Loss |
+|:------:|:-----:|:-------------:|
+| 0.0188 | 500   | 0.2957        |
+| 0.0375 | 1000  | 0.1174        |
+| 0.0563 | 1500  | 0.1148        |
+| 0.0750 | 2000  | 0.104         |
+| 0.0938 | 2500  | 0.0977        |
+| 0.1125 | 3000  | 0.0944        |
+| 0.1313 | 3500  | 0.0885        |
+| 0.1500 | 4000  | 0.083         |
+| 0.1688 | 4500  | 0.0817        |
+| 0.1875 | 5000  | 0.077         |
+| 0.2063 | 5500  | 0.0764        |
+| 0.2250 | 6000  | 0.0725        |
+| 0.2438 | 6500  | 0.0698        |
+| 0.2625 | 7000  | 0.0663        |
+| 0.2813 | 7500  | 0.0644        |
+| 0.3000 | 8000  | 0.0606        |
+| 0.3188 | 8500  | 0.0587        |
+| 0.3375 | 9000  | 0.0596        |
+| 0.3563 | 9500  | 0.0566        |
+| 0.3750 | 10000 | 0.0536        |
+| 0.3938 | 10500 | 0.0514        |
+| 0.4125 | 11000 | 0.0532        |
+| 0.4313 | 11500 | 0.0501        |
+| 0.4500 | 12000 | 0.0478        |
+| 0.4688 | 12500 | 0.0483        |
+| 0.4875 | 13000 | 0.0461        |
+| 0.5063 | 13500 | 0.0444        |
+| 0.5251 | 14000 | 0.0443        |
+| 0.5438 | 14500 | 0.0402        |
+| 0.5626 | 15000 | 0.0417        |
+| 0.5813 | 15500 | 0.0386        |
+| 0.6001 | 16000 | 0.0421        |
+| 0.6188 | 16500 | 0.0368        |
+| 0.6376 | 17000 | 0.036         |
+| 0.6563 | 17500 | 0.0352        |
+| 0.6751 | 18000 | 0.0339        |
+| 0.6938 | 18500 | 0.0336        |
+| 0.7126 | 19000 | 0.0334        |
+| 0.7313 | 19500 | 0.0312        |
+| 0.7501 | 20000 | 0.0325        |
+| 0.7688 | 20500 | 0.0317        |
+| 0.7876 | 21000 | 0.0284        |
+| 0.8063 | 21500 | 0.0281        |
+| 0.8251 | 22000 | 0.0294        |
+| 0.8438 | 22500 | 0.0283        |
+| 0.8626 | 23000 | 0.0277        |
+| 0.8813 | 23500 | 0.0268        |
+| 0.9001 | 24000 | 0.0254        |
+| 0.9188 | 24500 | 0.0249        |
+| 0.9376 | 25000 | 0.0255        |
+| 0.9563 | 25500 | 0.0251        |
+| 0.9751 | 26000 | 0.0244        |
+| 0.9938 | 26500 | 0.0249        |
+### Framework Versions
+- Python: 3.11.13
+- Sentence Transformers: 4.1.0
+- Transformers: 4.53.2
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.9.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.2
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "</s>": 50001,
+  "<mask>": 50004,
+  "<pad>": 50003,
+  "<s>": 50000,
+  "<unk>": 50002
+}

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 50000,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "cls",
+  "cls_token_id": 50281,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 50001,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "hidden_activation": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "local_attention": 128,
+  "local_attention_rope_theta": 10000,
+  "local_attention_window": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "pad_token_id": 1,
+  "repad_logits_with_grad": false,
+  "rope_theta": 160000,
+  "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.2",
+  "type_vocab_size": 2,
+  "vocab_size": 50005
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.53.2",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba52a05cf9415c600d0ba5637fa8e20bfc776a77e54e2485a08f9d45b0da3de0
+size 594955000

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 1024,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50000": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50001": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50002": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50003": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50004": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 256,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff