diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..99584470cf6c6ad1131afb8ffb9ca7c01ee14fb0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1642/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-3284/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-4926/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c840768c7cf31638d3b87da205868c6d04eef09 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +--- +base_model: openai/gpt-oss-20b +library_name: peft +model_name: gpt-oss-20b-finetuned +tags: +- base_model:adapter:openai/gpt-oss-20b +- lora +- sft +- transformers +- trl +licence: license +--- + +# Model Card for gpt-oss-20b-finetuned + +This model is a fine-tuned version of [openai/gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/srikar-eranky-uc-san-diego/dos-expert/runs/evauaygh) + + +This model was trained with SFT. + +### Framework versions + +- PEFT 0.17.1 +- TRL: 0.21.0 +- Transformers: 4.55.4 +- Pytorch: 2.7.1+cu118 +- Datasets: 4.0.0 +- Tokenizers: 0.21.4 + +## Citations + + + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f7444c0a113bd6bbbb6725b2090d1700b55b21c --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GptOssForCausalLM", + "parent_library": "transformers.models.gpt_oss.modeling_gpt_oss" + }, + "base_model_name_or_path": "openai/gpt-oss-20b", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "target_parameters": [ + "7.mlp.experts.gate_up_proj", + "7.mlp.experts.down_proj", + "15.mlp.experts.gate_up_proj", + "15.mlp.experts.down_proj", + "23.mlp.experts.gate_up_proj", + "23.mlp.experts.down_proj" + ], + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e7b60cdcad6eaf0526b0bf9915287ce60c75504 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12fc000b95c82bac6a99a48756a04e45988fdbd7cf08e8fc7d3059abbcb8677 +size 2366470368 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..dc7bb11927d29f653ba2740f2db2c688fd77592f --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,331 @@ +{#- + In addition to the normal inputs of `messages` and `tools`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\n\n" }} + {{- "namespace " + namespace_name + " {\n\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\n" }} + {{- "type "+ tool.name + " = " }} + {%- if tool.parameters and tool.parameters.properties %} + {{- "(_: {\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {%- if param_spec.description %} + {{- "// " + param_spec.description + "\n" }} + {%- endif %} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum %} + {{- ", // default: " + param_spec.default }} + {%- elif param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\n" }} + {%- else %} + {{- ",\n" }} + {%- endif -%} + {%- endfor %} + {{- "}) => any;\n\n" }} + {%- else -%} + {{- "() => any;\n\n" }} + {%- endif -%} + {%- endfor %} + {{- "} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\n\n" }} + {{- "// Tool for browsing.\n" }} + {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }} + {{- "// Cite information from the tool using the following format:\n" }} + {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\n" }} + {{- "// sources=web (default: web)\n" }} + {{- "namespace browser {\n\n" }} + {{- "// Searches for information related to `query` and displays `topn` results.\n" }} + {{- "type search = (_: {\n" }} + {{- "query: string,\n" }} + {{- "topn?: number, // default: 10\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }} + {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }} + {{- "// If `cursor` is not provided, the most recent page is implied.\n" }} + {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }} + {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }} + {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }} + {{- "type open = (_: {\n" }} + {{- "id?: number | string, // default: -1\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "loc?: number, // default: -1\n" }} + {{- "num_lines?: number, // default: -1\n" }} + {{- "view_source?: boolean, // default: false\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }} + {{- "type find = (_: {\n" }} + {{- "pattern: string,\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "}) => any;\n\n" }} + {{- "} // namespace browser\n\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\n\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %} + {%- endif %} + {{- model_identity + "\n" }} + {{- "Knowledge cutoff: 2024-06\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "Reasoning: " + reasoning_effort + "\n\n" }} + {%- if builtin_tools %} + {{- "# Tools\n\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }} + {%- if tools -%} + {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }} + {%- endif -%} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\n\n" }} + {{- developer_message }} + {{- "\n\n" }} + {%- endif %} + {%- if tools -%} + {{- "# Tools\n\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {#- Checks to ensure the messages are being passed in the format we expect #} + {%- if "content" in message %} + {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "thinking" in message %} + {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "tool_calls" in message %} + {#- We need very careful handling here - we want to drop the tool call analysis message if the model #} + {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #} + {#- when we render CoT/analysis messages in inference. #} + {%- set future_final_message = namespace(found=false) %} + {%- for future_message in loop_messages[loop.index:] %} + {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %} + {%- set future_final_message.found = true %} + {%- endif %} + {%- endfor %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content and message.thinking %} + {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }} + {%- elif message.content and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- elif message.thinking and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary " }} + {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|call|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {%- if "thinking" in message %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- else %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- elif message.role == 'user' -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} \ No newline at end of file diff --git a/checkpoint-1642/README.md b/checkpoint-1642/README.md new file mode 100644 index 0000000000000000000000000000000000000000..88d5a6d478d4dd6dceb04cd4688496bde58deabd --- /dev/null +++ b/checkpoint-1642/README.md @@ -0,0 +1,208 @@ +--- +base_model: openai/gpt-oss-20b +library_name: peft +tags: +- base_model:adapter:openai/gpt-oss-20b +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1642/adapter_config.json b/checkpoint-1642/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f7444c0a113bd6bbbb6725b2090d1700b55b21c --- /dev/null +++ b/checkpoint-1642/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GptOssForCausalLM", + "parent_library": "transformers.models.gpt_oss.modeling_gpt_oss" + }, + "base_model_name_or_path": "openai/gpt-oss-20b", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "target_parameters": [ + "7.mlp.experts.gate_up_proj", + "7.mlp.experts.down_proj", + "15.mlp.experts.gate_up_proj", + "15.mlp.experts.down_proj", + "23.mlp.experts.gate_up_proj", + "23.mlp.experts.down_proj" + ], + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1642/adapter_model.safetensors b/checkpoint-1642/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c8d581dc1b7f2415a31488c2cee9c5de0b701a5 --- /dev/null +++ b/checkpoint-1642/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2bb30c52c7bdb0a748ad8bac39bd5ca2c2eea8c78a335188ddff323f2fbdd4 +size 2366470368 diff --git a/checkpoint-1642/chat_template.jinja b/checkpoint-1642/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..dc7bb11927d29f653ba2740f2db2c688fd77592f --- /dev/null +++ b/checkpoint-1642/chat_template.jinja @@ -0,0 +1,331 @@ +{#- + In addition to the normal inputs of `messages` and `tools`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\n\n" }} + {{- "namespace " + namespace_name + " {\n\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\n" }} + {{- "type "+ tool.name + " = " }} + {%- if tool.parameters and tool.parameters.properties %} + {{- "(_: {\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {%- if param_spec.description %} + {{- "// " + param_spec.description + "\n" }} + {%- endif %} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum %} + {{- ", // default: " + param_spec.default }} + {%- elif param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\n" }} + {%- else %} + {{- ",\n" }} + {%- endif -%} + {%- endfor %} + {{- "}) => any;\n\n" }} + {%- else -%} + {{- "() => any;\n\n" }} + {%- endif -%} + {%- endfor %} + {{- "} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\n\n" }} + {{- "// Tool for browsing.\n" }} + {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }} + {{- "// Cite information from the tool using the following format:\n" }} + {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\n" }} + {{- "// sources=web (default: web)\n" }} + {{- "namespace browser {\n\n" }} + {{- "// Searches for information related to `query` and displays `topn` results.\n" }} + {{- "type search = (_: {\n" }} + {{- "query: string,\n" }} + {{- "topn?: number, // default: 10\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }} + {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }} + {{- "// If `cursor` is not provided, the most recent page is implied.\n" }} + {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }} + {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }} + {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }} + {{- "type open = (_: {\n" }} + {{- "id?: number | string, // default: -1\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "loc?: number, // default: -1\n" }} + {{- "num_lines?: number, // default: -1\n" }} + {{- "view_source?: boolean, // default: false\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }} + {{- "type find = (_: {\n" }} + {{- "pattern: string,\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "}) => any;\n\n" }} + {{- "} // namespace browser\n\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\n\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %} + {%- endif %} + {{- model_identity + "\n" }} + {{- "Knowledge cutoff: 2024-06\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "Reasoning: " + reasoning_effort + "\n\n" }} + {%- if builtin_tools %} + {{- "# Tools\n\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }} + {%- if tools -%} + {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }} + {%- endif -%} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\n\n" }} + {{- developer_message }} + {{- "\n\n" }} + {%- endif %} + {%- if tools -%} + {{- "# Tools\n\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {#- Checks to ensure the messages are being passed in the format we expect #} + {%- if "content" in message %} + {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "thinking" in message %} + {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "tool_calls" in message %} + {#- We need very careful handling here - we want to drop the tool call analysis message if the model #} + {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #} + {#- when we render CoT/analysis messages in inference. #} + {%- set future_final_message = namespace(found=false) %} + {%- for future_message in loop_messages[loop.index:] %} + {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %} + {%- set future_final_message.found = true %} + {%- endif %} + {%- endfor %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content and message.thinking %} + {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }} + {%- elif message.content and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- elif message.thinking and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary " }} + {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|call|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {%- if "thinking" in message %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- else %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- elif message.role == 'user' -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} \ No newline at end of file diff --git a/checkpoint-1642/optimizer.pt b/checkpoint-1642/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e7e74c92488afbbea32b27b7f522127a4fb39d6 --- /dev/null +++ b/checkpoint-1642/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8c076a2c7a54274c3e6fea5b408ff597f64186f832c0921c0fc4ac90a90646 +size 120495883 diff --git a/checkpoint-1642/rng_state_0.pth b/checkpoint-1642/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4b0bea5e7a82c1886ccb79d111257405625494a6 --- /dev/null +++ b/checkpoint-1642/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f92cf63e0989759370d24108b469c492c12202403f036015307ce49f12cedc +size 16389 diff --git a/checkpoint-1642/rng_state_1.pth b/checkpoint-1642/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c94d69496797fa9c0f83f8a534c8ba692cb143a --- /dev/null +++ b/checkpoint-1642/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed40a0a4e9f365d2c6cc004d97e6705894eba46c8be4c160c1455bc3062dee1 +size 16389 diff --git a/checkpoint-1642/rng_state_2.pth b/checkpoint-1642/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3d2f614c50841e8255840051884b16a6e22c38a --- /dev/null +++ b/checkpoint-1642/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d688b304d19c260b5cfa471535ed51d7e1d60b3a0d0159dfd1a04b87904a9f42 +size 16389 diff --git a/checkpoint-1642/rng_state_3.pth b/checkpoint-1642/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..84660ff39da893f3edf58f4a54971ca424b5ae70 --- /dev/null +++ b/checkpoint-1642/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b +size 16389 diff --git a/checkpoint-1642/rng_state_4.pth b/checkpoint-1642/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd63d0f91d990383a77a110da21707c2e4161995 --- /dev/null +++ b/checkpoint-1642/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469900fd39c667ffbd49c3c407c0ba317a1e9f5f9339a99b5d38423b7d0ce6d4 +size 16389 diff --git a/checkpoint-1642/rng_state_5.pth b/checkpoint-1642/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9f2a80a12ac3102dc70232f90338ece968e5537 --- /dev/null +++ b/checkpoint-1642/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124688471ff2a6e80f2fcefedbf741fb18d08dd539d5bd07a52e81be545142a5 +size 16389 diff --git a/checkpoint-1642/rng_state_6.pth b/checkpoint-1642/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffdeaa99e14137e82d32d685f2311f4ac57f4cd1 --- /dev/null +++ b/checkpoint-1642/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e69f1ced9f992a72c948698e5eb06088610788988cdb2fdbdd624e064319d60 +size 16389 diff --git a/checkpoint-1642/rng_state_7.pth b/checkpoint-1642/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad30a51583bfc3be86c6b6aa3bc2e815019b3e77 --- /dev/null +++ b/checkpoint-1642/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a376268a55d6ee10c371c06aa952334c4c6a1af9ea2d71b1951a57367a0c6722 +size 16389 diff --git a/checkpoint-1642/scheduler.pt b/checkpoint-1642/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bafb22c470130b817f221c0ddbadc7f7be8158ac --- /dev/null +++ b/checkpoint-1642/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc6cc730a04732028e5e99bd0647de7bc952d3bd8bafab1e763ff61ef93625d +size 1465 diff --git a/checkpoint-1642/special_tokens_map.json b/checkpoint-1642/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a463562962fc1910d6c169b0a7e9c95872abc92e --- /dev/null +++ b/checkpoint-1642/special_tokens_map.json @@ -0,0 +1,1817 @@ +{ + "additional_special_tokens": [ + { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1642/tokenizer.json b/checkpoint-1642/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e0ace705cfa5bd60a370b9daafe3e1513770b9 --- /dev/null +++ b/checkpoint-1642/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec3af79eb37b392bb5382bfe3f4eeab633498c220a804f4fd5d7d102a000f1a +size 27914312 diff --git a/checkpoint-1642/tokenizer_config.json b/checkpoint-1642/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..923c92874790920d6eaf5c317c2d63cd3b569e62 --- /dev/null +++ b/checkpoint-1642/tokenizer_config.json @@ -0,0 +1,2489 @@ +{ + "added_tokens_decoder": { + "1529": { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2022": { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2416": { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3360": { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4478": { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5173": { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6258": { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6781": { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7726": { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9375": { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9760": { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10227": { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11720": { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12235": { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12515": { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13874": { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13905": { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15409": { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17183": { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19862": { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20174": { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22723": { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27968": { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29829": { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29864": { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34134": { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34435": { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39749": { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43230": { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46966": { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47787": { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47994": { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50719": { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52907": { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54793": { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58530": { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66773": { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68495": { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70684": { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72089": { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74923": { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77411": { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81990": { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82244": { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84526": { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86154": { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86297": { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93660": { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94432": { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95202": { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100413": { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104755": { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104805": { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106979": { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110871": { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117565": { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118754": { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126731": { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126978": { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130911": { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147130": { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152076": { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152095": { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152119": { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155474": { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158359": { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162121": { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166996": { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171893": { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172873": { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175772": { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177970": { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178261": { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178974": { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186075": { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187697": { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199998": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199999": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200000": { + "content": "<|reserved_200000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200001": { + "content": "<|reserved_200001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200002": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200003": { + "content": "<|constrain|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200004": { + "content": "<|reserved_200004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200005": { + "content": "<|channel|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200006": { + "content": "<|start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200007": { + "content": "<|end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200008": { + "content": "<|message|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200009": { + "content": "<|reserved_200009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200010": { + "content": "<|reserved_200010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200011": { + "content": "<|reserved_200011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200012": { + "content": "<|call|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200013": { + "content": "<|reserved_200013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200014": { + "content": "<|reserved_200014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200015": { + "content": "<|reserved_200015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200016": { + "content": "<|reserved_200016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200017": { + "content": "<|reserved_200017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200018": { + "content": "<|endofprompt|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200019": { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200020": { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200021": { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200022": { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200023": { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200024": { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200025": { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200026": { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200027": { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200028": { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200029": { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200030": { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200031": { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200032": { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200033": { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200034": { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200035": { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200036": { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200037": { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200038": { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200039": { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200040": { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200041": { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200042": { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200043": { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200044": { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200045": { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200046": { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200047": { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200048": { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200049": { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200050": { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200051": { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200052": { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200053": { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200054": { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200055": { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200056": { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200057": { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200058": { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200059": { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200060": { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200061": { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200062": { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200063": { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200064": { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200065": { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200066": { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200067": { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200068": { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200069": { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200070": { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200071": { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200072": { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200073": { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200074": { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200075": { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200076": { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200077": { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200078": { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200079": { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200080": { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200081": { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200082": { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200083": { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200084": { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200085": { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200086": { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200087": { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200088": { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200089": { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200090": { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200091": { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200092": { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200093": { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200094": { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200095": { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200096": { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200097": { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200098": { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200099": { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200100": { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200101": { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200102": { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200103": { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200104": { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200105": { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200106": { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200107": { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200108": { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200109": { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200110": { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200111": { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200112": { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200113": { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200114": { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200115": { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200116": { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200117": { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200118": { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200119": { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200120": { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200121": { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200122": { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200123": { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200124": { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200125": { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200126": { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200127": { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200128": { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200129": { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200130": { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200131": { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200132": { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200133": { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200134": { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200135": { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200136": { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200137": { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200138": { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200139": { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200140": { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200141": { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200142": { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200143": { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200144": { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200145": { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200146": { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200147": { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200148": { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200149": { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200150": { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200151": { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200152": { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200153": { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200154": { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200155": { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200156": { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200157": { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200158": { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200159": { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200160": { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200161": { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200162": { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200163": { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200164": { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200165": { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200166": { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200167": { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200168": { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200169": { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200170": { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200171": { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200172": { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200173": { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200174": { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200175": { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200176": { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200177": { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200178": { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200179": { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200180": { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200181": { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200182": { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200183": { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200184": { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200185": { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200186": { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200187": { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200188": { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200189": { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200190": { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200191": { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200192": { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200193": { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200194": { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200195": { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200196": { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200197": { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "AAD", + "AArch64", + "ACL", + "AD", + "AES", + "AES256GCM", + "AESCBC", + "AKE", + "AON", + "ASID", + "AXI", + "Acronym", + "AoU", + "AutoSar", + "BAM", + "BCH", + "BIST", + "BOM", + "BPMP", + "BPS", + "BPU", + "BRBCT", + "BW", + "C2C", + "CA", + "CAN", + "CANFD", + "CAR", + "CAVP", + "CBB", + "CBC", + "CBR", + "CCM", + "CCPLEX", + "CCPLEX_L2", + "CCPLEX_MISC", + "CCPLEX_SCF", + "CDD", + "CIF", + "CMAC", + "CPE", + "CPU", + "CRC", + "CSI", + "CSP", + "CTR", + "CTXT", + "CV", + "DBB", + "DEP", + "DEV", + "DFA", + "DFT", + "DIP", + "DISPLAY", + "DLA", + "DMA", + "DMEM", + "DPA", + "DSC", + "DT", + "DU", + "DVMU", + "EC", + "ECB", + "ECC", + "ECDHE", + "ECDSA", + "ECID", + "EDR", + "EOF", + "EOTTI", + "EQoS", + "FCL", + "FHTI", + "FIPS", + "FMEA", + "FMON", + "FO", + "FP", + "FPS", + "FW", + "FuSa", + "GCM", + "GFD", + "GIC", + "GMAC", + "GMSL", + "GOP", + "GP", + "GPCDMA", + "GPU", + "GR", + "Gpps", + "HBR", + "HBR2", + "HBR3", + "HDS", + "HIS", + "HMAC", + "HPSE", + "HSI", + "HSM", + "HSP", + "HW", + "IAS", + "IC", + "ICD", + "IDR", + "IDT", + "IEP", + "IEU", + "IFU", + "IID", + "ILD", + "IMEM", + "IOC", + "IOFA", + "IOMMU", + "IPC", + "IPI", + "IRF", + "IST", + "IV", + "IoT", + "JSR", + "KAT", + "KCV", + "KDF", + "KPI", + "L1PT", + "L2C", + "L2mDIR", + "L2vDIR", + "LAB", + "LBIST", + "LDC", + "LFT", + "LIC", + "LIP", + "LSB", + "MAC", + "MAQ", + "MB", + "MBIST", + "MCAL", + "MCE", + "MCU", + "MSB", + "MSS", + "MST", + "MTS", + "MiTM", + "NIP", + "NIST", + "NITO", + "NOC", + "NOOP", + "NT", + "NVDEC", + "NVENC", + "NVJPG", + "NVM", + "NVVSE", + "OEM", + "OFA", + "OS", + "OSP", + "OTP", + "PB", + "PCIE", + "PCPU", + "PCR", + "PCT", + "PDK", + "PII", + "PIP", + "PKC", + "PKCS", + "PKI", + "PL", + "PLA", + "POR", + "PPC", + "PSC", + "PTXT", + "PVA", + "QNX", + "QOS", + "QSPI", + "RBG", + "RBR", + "RC", + "RDEV", + "REE", + "RMA", + "RMW", + "RSA", + "RSB", + "RTS", + "RoT", + "SAE", + "SBK", + "SCH", + "SDK", + "SE", + "SEL0", + "SEL1", + "SEooC", + "SGM", + "SHA", + "SHA256", + "SHA512", + "SHE", + "SKU", + "SNOC", + "SO", + "SPA", + "SQ", + "SSR", + "SST", + "SW", + "SWAT", + "SoC", + "TA", + "TCF", + "TEE", + "THI", + "TNR", + "TOS", + "TRC", + "TRL", + "TSEC", + "TZ", + "UFS", + "VBR", + "VCPU", + "VI", + "VIC", + "VMEM", + "VMID", + "VPU", + "VRC", + "VUI", + "WARB", + "XIP", + "bpp", + "eMMC", + "hfPLA", + "iGPU", + "ipc", + "ipc_fg", + "ipc_t", + "sbPLA", + "xBTV", + "xps" + ], + "bos_token": "<|startoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|return|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|endoftext|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1642/trainer_state.json b/checkpoint-1642/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4c613853919f8c662762f74a27b0b41d643ac610 --- /dev/null +++ b/checkpoint-1642/trainer_state.json @@ -0,0 +1,1510 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1642, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0060901339829476245, + "grad_norm": 23.5319766998291, + "learning_rate": 1.2162162162162164e-05, + "loss": 4.5905, + "mean_token_accuracy": 0.3401473943144083, + "num_tokens": 132681.0, + "step": 10 + }, + { + "epoch": 0.012180267965895249, + "grad_norm": 6.916630744934082, + "learning_rate": 2.5675675675675675e-05, + "loss": 3.957, + "mean_token_accuracy": 0.3799716055393219, + "num_tokens": 264238.0, + "step": 20 + }, + { + "epoch": 0.018270401948842874, + "grad_norm": 2.717982292175293, + "learning_rate": 3.918918918918919e-05, + "loss": 3.047, + "mean_token_accuracy": 0.46811963245272636, + "num_tokens": 401308.0, + "step": 30 + }, + { + "epoch": 0.024360535931790498, + "grad_norm": 2.407865524291992, + "learning_rate": 5.27027027027027e-05, + "loss": 2.5322, + "mean_token_accuracy": 0.5190828196704388, + "num_tokens": 532444.0, + "step": 40 + }, + { + "epoch": 0.030450669914738125, + "grad_norm": 1.018301248550415, + "learning_rate": 6.621621621621621e-05, + "loss": 2.1325, + "mean_token_accuracy": 0.5782605841755867, + "num_tokens": 660406.0, + "step": 50 + }, + { + "epoch": 0.03654080389768575, + "grad_norm": 0.7315741181373596, + "learning_rate": 7.972972972972974e-05, + "loss": 1.9044, + "mean_token_accuracy": 0.6264914631843567, + "num_tokens": 795304.0, + "step": 60 + }, + { + "epoch": 0.04263093788063337, + "grad_norm": 0.6652920246124268, + "learning_rate": 9.324324324324324e-05, + "loss": 1.633, + "mean_token_accuracy": 0.6683938711881637, + "num_tokens": 934543.0, + "step": 70 + }, + { + "epoch": 0.048721071863580996, + "grad_norm": 0.6119660139083862, + "learning_rate": 0.00010675675675675677, + "loss": 1.543, + "mean_token_accuracy": 0.6834091022610664, + "num_tokens": 1070669.0, + "step": 80 + }, + { + "epoch": 0.05481120584652863, + "grad_norm": 0.591424286365509, + "learning_rate": 0.00012027027027027027, + "loss": 1.4154, + "mean_token_accuracy": 0.6991497233510018, + "num_tokens": 1211114.0, + "step": 90 + }, + { + "epoch": 0.06090133982947625, + "grad_norm": 0.5663530230522156, + "learning_rate": 0.0001337837837837838, + "loss": 1.3176, + "mean_token_accuracy": 0.7089206710457802, + "num_tokens": 1349584.0, + "step": 100 + }, + { + "epoch": 0.06699147381242387, + "grad_norm": 0.5881878137588501, + "learning_rate": 0.0001472972972972973, + "loss": 1.2293, + "mean_token_accuracy": 0.7254403859376908, + "num_tokens": 1487515.0, + "step": 110 + }, + { + "epoch": 0.0730816077953715, + "grad_norm": 0.7664394974708557, + "learning_rate": 0.00016081081081081083, + "loss": 1.1814, + "mean_token_accuracy": 0.7306812778115273, + "num_tokens": 1618603.0, + "step": 120 + }, + { + "epoch": 0.07917174177831912, + "grad_norm": 0.6155670881271362, + "learning_rate": 0.00017432432432432432, + "loss": 1.1967, + "mean_token_accuracy": 0.7284250959753991, + "num_tokens": 1750466.0, + "step": 130 + }, + { + "epoch": 0.08526187576126674, + "grad_norm": 0.5296258330345154, + "learning_rate": 0.00018783783783783784, + "loss": 1.0955, + "mean_token_accuracy": 0.7472824215888977, + "num_tokens": 1887913.0, + "step": 140 + }, + { + "epoch": 0.09135200974421437, + "grad_norm": 0.5564976334571838, + "learning_rate": 0.00019999998054550544, + "loss": 1.118, + "mean_token_accuracy": 0.7397311359643937, + "num_tokens": 2018579.0, + "step": 150 + }, + { + "epoch": 0.09744214372716199, + "grad_norm": 0.5301142930984497, + "learning_rate": 0.00019999764601633156, + "loss": 1.045, + "mean_token_accuracy": 0.7519380420446395, + "num_tokens": 2158851.0, + "step": 160 + }, + { + "epoch": 0.10353227771010962, + "grad_norm": 0.5949111580848694, + "learning_rate": 0.00019999142070388495, + "loss": 1.0497, + "mean_token_accuracy": 0.7520910769701004, + "num_tokens": 2296715.0, + "step": 170 + }, + { + "epoch": 0.10962241169305725, + "grad_norm": 0.6169262528419495, + "learning_rate": 0.0001999813048772986, + "loss": 1.0821, + "mean_token_accuracy": 0.7406247839331627, + "num_tokens": 2424756.0, + "step": 180 + }, + { + "epoch": 0.11571254567600488, + "grad_norm": 0.58912593126297, + "learning_rate": 0.00019996729897390057, + "loss": 1.0286, + "mean_token_accuracy": 0.7527454376220704, + "num_tokens": 2559362.0, + "step": 190 + }, + { + "epoch": 0.1218026796589525, + "grad_norm": 0.5084304213523865, + "learning_rate": 0.00019994940359919483, + "loss": 0.992, + "mean_token_accuracy": 0.7640391126275062, + "num_tokens": 2700231.0, + "step": 200 + }, + { + "epoch": 0.1278928136419001, + "grad_norm": 0.5790796279907227, + "learning_rate": 0.00019992761952683516, + "loss": 1.0146, + "mean_token_accuracy": 0.7554366230964661, + "num_tokens": 2831324.0, + "step": 210 + }, + { + "epoch": 0.13398294762484775, + "grad_norm": 0.5852051377296448, + "learning_rate": 0.00019990194769859188, + "loss": 0.978, + "mean_token_accuracy": 0.7612502485513687, + "num_tokens": 2967346.0, + "step": 220 + }, + { + "epoch": 0.14007308160779536, + "grad_norm": 0.5102785229682922, + "learning_rate": 0.00019987238922431088, + "loss": 0.9616, + "mean_token_accuracy": 0.7677591517567635, + "num_tokens": 3110936.0, + "step": 230 + }, + { + "epoch": 0.146163215590743, + "grad_norm": 0.5472669005393982, + "learning_rate": 0.00019983894538186576, + "loss": 0.9535, + "mean_token_accuracy": 0.76737689524889, + "num_tokens": 3247496.0, + "step": 240 + }, + { + "epoch": 0.15225334957369063, + "grad_norm": 0.5611053109169006, + "learning_rate": 0.0001998016176171026, + "loss": 0.9577, + "mean_token_accuracy": 0.7626092001795769, + "num_tokens": 3384178.0, + "step": 250 + }, + { + "epoch": 0.15834348355663824, + "grad_norm": 0.54055255651474, + "learning_rate": 0.0001997604075437774, + "loss": 0.9907, + "mean_token_accuracy": 0.7575223430991173, + "num_tokens": 3517617.0, + "step": 260 + }, + { + "epoch": 0.16443361753958588, + "grad_norm": 0.558316707611084, + "learning_rate": 0.0001997153169434864, + "loss": 0.944, + "mean_token_accuracy": 0.7664194419980049, + "num_tokens": 3662878.0, + "step": 270 + }, + { + "epoch": 0.1705237515225335, + "grad_norm": 0.49766939878463745, + "learning_rate": 0.0001996663477655889, + "loss": 0.9106, + "mean_token_accuracy": 0.7760038167238236, + "num_tokens": 3807411.0, + "step": 280 + }, + { + "epoch": 0.17661388550548113, + "grad_norm": 0.4953667223453522, + "learning_rate": 0.0001996135021271232, + "loss": 0.9687, + "mean_token_accuracy": 0.7605679705739021, + "num_tokens": 3936840.0, + "step": 290 + }, + { + "epoch": 0.18270401948842874, + "grad_norm": 0.5447947978973389, + "learning_rate": 0.00019955678231271484, + "loss": 0.9625, + "mean_token_accuracy": 0.7603292793035508, + "num_tokens": 4067826.0, + "step": 300 + }, + { + "epoch": 0.18879415347137637, + "grad_norm": 0.4665842056274414, + "learning_rate": 0.00019949619077447807, + "loss": 0.9372, + "mean_token_accuracy": 0.7676101759076118, + "num_tokens": 4205887.0, + "step": 310 + }, + { + "epoch": 0.19488428745432398, + "grad_norm": 0.515690267086029, + "learning_rate": 0.00019943173013190965, + "loss": 0.923, + "mean_token_accuracy": 0.7708473294973374, + "num_tokens": 4342894.0, + "step": 320 + }, + { + "epoch": 0.20097442143727162, + "grad_norm": 0.5831382274627686, + "learning_rate": 0.00019936340317177565, + "loss": 0.9203, + "mean_token_accuracy": 0.7708552837371826, + "num_tokens": 4477651.0, + "step": 330 + }, + { + "epoch": 0.20706455542021923, + "grad_norm": 0.6162773966789246, + "learning_rate": 0.0001992912128479911, + "loss": 0.916, + "mean_token_accuracy": 0.7702088996767997, + "num_tokens": 4610746.0, + "step": 340 + }, + { + "epoch": 0.21315468940316687, + "grad_norm": 0.5172462463378906, + "learning_rate": 0.00019921516228149207, + "loss": 0.8942, + "mean_token_accuracy": 0.7741821393370628, + "num_tokens": 4751175.0, + "step": 350 + }, + { + "epoch": 0.2192448233861145, + "grad_norm": 0.5890468955039978, + "learning_rate": 0.0001991352547601009, + "loss": 0.9229, + "mean_token_accuracy": 0.7691043332219124, + "num_tokens": 4882328.0, + "step": 360 + }, + { + "epoch": 0.22533495736906212, + "grad_norm": 0.5522404909133911, + "learning_rate": 0.00019905149373838408, + "loss": 0.9294, + "mean_token_accuracy": 0.7646071568131447, + "num_tokens": 5012181.0, + "step": 370 + }, + { + "epoch": 0.23142509135200975, + "grad_norm": 0.5349445939064026, + "learning_rate": 0.0001989638828375028, + "loss": 0.8797, + "mean_token_accuracy": 0.7771721839904785, + "num_tokens": 5151133.0, + "step": 380 + }, + { + "epoch": 0.23751522533495736, + "grad_norm": 0.531052827835083, + "learning_rate": 0.00019887242584505635, + "loss": 0.9221, + "mean_token_accuracy": 0.7678465083241462, + "num_tokens": 5279790.0, + "step": 390 + }, + { + "epoch": 0.243605359317905, + "grad_norm": 0.5126324892044067, + "learning_rate": 0.00019877712671491864, + "loss": 0.8862, + "mean_token_accuracy": 0.7739894777536392, + "num_tokens": 5412390.0, + "step": 400 + }, + { + "epoch": 0.2496954933008526, + "grad_norm": 0.5111438632011414, + "learning_rate": 0.00019867798956706693, + "loss": 0.9005, + "mean_token_accuracy": 0.7721902653574944, + "num_tokens": 5545801.0, + "step": 410 + }, + { + "epoch": 0.2557856272838002, + "grad_norm": 0.5488138794898987, + "learning_rate": 0.00019857501868740402, + "loss": 0.8988, + "mean_token_accuracy": 0.7690282896161079, + "num_tokens": 5673758.0, + "step": 420 + }, + { + "epoch": 0.2618757612667479, + "grad_norm": 0.5497994422912598, + "learning_rate": 0.0001984682185275727, + "loss": 0.8802, + "mean_token_accuracy": 0.7780183687806129, + "num_tokens": 5813158.0, + "step": 430 + }, + { + "epoch": 0.2679658952496955, + "grad_norm": 0.5478431582450867, + "learning_rate": 0.0001983575937047635, + "loss": 0.865, + "mean_token_accuracy": 0.7785944610834121, + "num_tokens": 5947367.0, + "step": 440 + }, + { + "epoch": 0.2740560292326431, + "grad_norm": 0.5188766717910767, + "learning_rate": 0.00019824314900151487, + "loss": 0.8798, + "mean_token_accuracy": 0.7752803862094879, + "num_tokens": 6081060.0, + "step": 450 + }, + { + "epoch": 0.2801461632155907, + "grad_norm": 0.530222475528717, + "learning_rate": 0.00019812488936550666, + "loss": 0.8628, + "mean_token_accuracy": 0.7801630645990372, + "num_tokens": 6217834.0, + "step": 460 + }, + { + "epoch": 0.2862362971985384, + "grad_norm": 0.5987964868545532, + "learning_rate": 0.00019800281990934614, + "loss": 0.8775, + "mean_token_accuracy": 0.7760324433445931, + "num_tokens": 6350451.0, + "step": 470 + }, + { + "epoch": 0.292326431181486, + "grad_norm": 0.5468559265136719, + "learning_rate": 0.0001978769459103468, + "loss": 0.8721, + "mean_token_accuracy": 0.7794204503297806, + "num_tokens": 6484738.0, + "step": 480 + }, + { + "epoch": 0.2984165651644336, + "grad_norm": 0.5541098117828369, + "learning_rate": 0.0001977472728103005, + "loss": 0.8785, + "mean_token_accuracy": 0.7767582029104233, + "num_tokens": 6619313.0, + "step": 490 + }, + { + "epoch": 0.30450669914738127, + "grad_norm": 0.5134281516075134, + "learning_rate": 0.0001976138062152419, + "loss": 0.8717, + "mean_token_accuracy": 0.7752724394202233, + "num_tokens": 6753195.0, + "step": 500 + }, + { + "epoch": 0.3105968331303289, + "grad_norm": 0.49164435267448425, + "learning_rate": 0.00019747655189520633, + "loss": 0.8757, + "mean_token_accuracy": 0.7768464118242264, + "num_tokens": 6890448.0, + "step": 510 + }, + { + "epoch": 0.3166869671132765, + "grad_norm": 0.5899345278739929, + "learning_rate": 0.00019733551578398023, + "loss": 0.8322, + "mean_token_accuracy": 0.7859320402145386, + "num_tokens": 7027488.0, + "step": 520 + }, + { + "epoch": 0.3227771010962241, + "grad_norm": 0.6552841663360596, + "learning_rate": 0.0001971907039788447, + "loss": 0.861, + "mean_token_accuracy": 0.7770532324910164, + "num_tokens": 7161184.0, + "step": 530 + }, + { + "epoch": 0.32886723507917176, + "grad_norm": 0.5038822889328003, + "learning_rate": 0.0001970421227403117, + "loss": 0.8825, + "mean_token_accuracy": 0.775890800356865, + "num_tokens": 7294399.0, + "step": 540 + }, + { + "epoch": 0.33495736906211937, + "grad_norm": 0.5094267129898071, + "learning_rate": 0.00019688977849185378, + "loss": 0.8598, + "mean_token_accuracy": 0.7817838475108146, + "num_tokens": 7427183.0, + "step": 550 + }, + { + "epoch": 0.341047503045067, + "grad_norm": 0.5282809138298035, + "learning_rate": 0.00019673367781962594, + "loss": 0.8463, + "mean_token_accuracy": 0.7812959104776382, + "num_tokens": 7561734.0, + "step": 560 + }, + { + "epoch": 0.3471376370280146, + "grad_norm": 0.45355409383773804, + "learning_rate": 0.00019657382747218123, + "loss": 0.8207, + "mean_token_accuracy": 0.7888262197375298, + "num_tokens": 7706228.0, + "step": 570 + }, + { + "epoch": 0.35322777101096225, + "grad_norm": 0.5162333846092224, + "learning_rate": 0.00019641023436017883, + "loss": 0.8235, + "mean_token_accuracy": 0.7868947923183441, + "num_tokens": 7846684.0, + "step": 580 + }, + { + "epoch": 0.35931790499390986, + "grad_norm": 0.5194632411003113, + "learning_rate": 0.00019624290555608526, + "loss": 0.8129, + "mean_token_accuracy": 0.7884069249033928, + "num_tokens": 7986811.0, + "step": 590 + }, + { + "epoch": 0.3654080389768575, + "grad_norm": 0.5494846701622009, + "learning_rate": 0.00019607184829386882, + "loss": 0.8084, + "mean_token_accuracy": 0.7874000474810601, + "num_tokens": 8124538.0, + "step": 600 + }, + { + "epoch": 0.37149817295980514, + "grad_norm": 0.5368776917457581, + "learning_rate": 0.0001958970699686866, + "loss": 0.8225, + "mean_token_accuracy": 0.783010233938694, + "num_tokens": 8260529.0, + "step": 610 + }, + { + "epoch": 0.37758830694275275, + "grad_norm": 0.6229024529457092, + "learning_rate": 0.00019571857813656496, + "loss": 0.8786, + "mean_token_accuracy": 0.7753148928284646, + "num_tokens": 8389042.0, + "step": 620 + }, + { + "epoch": 0.38367844092570036, + "grad_norm": 0.5601000785827637, + "learning_rate": 0.00019553638051407279, + "loss": 0.8909, + "mean_token_accuracy": 0.7745720192790031, + "num_tokens": 8513603.0, + "step": 630 + }, + { + "epoch": 0.38976857490864797, + "grad_norm": 0.438970685005188, + "learning_rate": 0.0001953504849779879, + "loss": 0.8085, + "mean_token_accuracy": 0.7871840804815292, + "num_tokens": 8652970.0, + "step": 640 + }, + { + "epoch": 0.39585870889159563, + "grad_norm": 0.5505132079124451, + "learning_rate": 0.00019516089956495648, + "loss": 0.8102, + "mean_token_accuracy": 0.7869585514068603, + "num_tokens": 8792103.0, + "step": 650 + }, + { + "epoch": 0.40194884287454324, + "grad_norm": 0.5447221398353577, + "learning_rate": 0.00019496763247114581, + "loss": 0.8336, + "mean_token_accuracy": 0.7816034242510795, + "num_tokens": 8926853.0, + "step": 660 + }, + { + "epoch": 0.40803897685749085, + "grad_norm": 0.4652746915817261, + "learning_rate": 0.00019477069205188965, + "loss": 0.8383, + "mean_token_accuracy": 0.7826304718852043, + "num_tokens": 9059592.0, + "step": 670 + }, + { + "epoch": 0.41412911084043846, + "grad_norm": 0.42363590002059937, + "learning_rate": 0.00019457008682132726, + "loss": 0.847, + "mean_token_accuracy": 0.7810002073645592, + "num_tokens": 9193062.0, + "step": 680 + }, + { + "epoch": 0.42021924482338613, + "grad_norm": 0.5209478735923767, + "learning_rate": 0.00019436582545203518, + "loss": 0.8766, + "mean_token_accuracy": 0.7733785718679428, + "num_tokens": 9315805.0, + "step": 690 + }, + { + "epoch": 0.42630937880633374, + "grad_norm": 0.5176642537117004, + "learning_rate": 0.00019415791677465237, + "loss": 0.8155, + "mean_token_accuracy": 0.7869213685393334, + "num_tokens": 9448863.0, + "step": 700 + }, + { + "epoch": 0.43239951278928135, + "grad_norm": 0.4531058371067047, + "learning_rate": 0.00019394636977749843, + "loss": 0.8096, + "mean_token_accuracy": 0.7903949975967407, + "num_tokens": 9589382.0, + "step": 710 + }, + { + "epoch": 0.438489646772229, + "grad_norm": 0.5651549100875854, + "learning_rate": 0.000193731193606185, + "loss": 0.8263, + "mean_token_accuracy": 0.7823062822222709, + "num_tokens": 9723562.0, + "step": 720 + }, + { + "epoch": 0.4445797807551766, + "grad_norm": 0.5377989411354065, + "learning_rate": 0.00019351239756322031, + "loss": 0.7993, + "mean_token_accuracy": 0.7908329650759697, + "num_tokens": 9859255.0, + "step": 730 + }, + { + "epoch": 0.45066991473812423, + "grad_norm": 0.5420868396759033, + "learning_rate": 0.00019328999110760722, + "loss": 0.8461, + "mean_token_accuracy": 0.7780480548739434, + "num_tokens": 9981578.0, + "step": 740 + }, + { + "epoch": 0.45676004872107184, + "grad_norm": 0.4889216125011444, + "learning_rate": 0.000193063983854434, + "loss": 0.7652, + "mean_token_accuracy": 0.7959530428051949, + "num_tokens": 10122922.0, + "step": 750 + }, + { + "epoch": 0.4628501827040195, + "grad_norm": 0.5044087767601013, + "learning_rate": 0.00019283438557445893, + "loss": 0.824, + "mean_token_accuracy": 0.7845935523509979, + "num_tokens": 10252854.0, + "step": 760 + }, + { + "epoch": 0.4689403166869671, + "grad_norm": 0.5286466479301453, + "learning_rate": 0.00019260120619368773, + "loss": 0.815, + "mean_token_accuracy": 0.7850656941533088, + "num_tokens": 10385075.0, + "step": 770 + }, + { + "epoch": 0.47503045066991473, + "grad_norm": 0.5441628694534302, + "learning_rate": 0.00019236445579294437, + "loss": 0.8048, + "mean_token_accuracy": 0.7876680314540863, + "num_tokens": 10520011.0, + "step": 780 + }, + { + "epoch": 0.48112058465286234, + "grad_norm": 0.49002447724342346, + "learning_rate": 0.0001921241446074355, + "loss": 0.8059, + "mean_token_accuracy": 0.7898563235998154, + "num_tokens": 10652488.0, + "step": 790 + }, + { + "epoch": 0.48721071863581, + "grad_norm": 0.4479144811630249, + "learning_rate": 0.0001918802830263077, + "loss": 0.7913, + "mean_token_accuracy": 0.7928732186555862, + "num_tokens": 10785974.0, + "step": 800 + }, + { + "epoch": 0.4933008526187576, + "grad_norm": 0.5007497668266296, + "learning_rate": 0.00019163288159219853, + "loss": 0.8083, + "mean_token_accuracy": 0.7893043681979179, + "num_tokens": 10920950.0, + "step": 810 + }, + { + "epoch": 0.4993909866017052, + "grad_norm": 0.5289483070373535, + "learning_rate": 0.00019138195100078064, + "loss": 0.8033, + "mean_token_accuracy": 0.7864485770463944, + "num_tokens": 11056380.0, + "step": 820 + }, + { + "epoch": 0.5054811205846529, + "grad_norm": 0.5604159832000732, + "learning_rate": 0.0001911275021002994, + "loss": 0.7652, + "mean_token_accuracy": 0.7946401730179786, + "num_tokens": 11196074.0, + "step": 830 + }, + { + "epoch": 0.5115712545676004, + "grad_norm": 0.43645399808883667, + "learning_rate": 0.00019086954589110397, + "loss": 0.7724, + "mean_token_accuracy": 0.7990294560790062, + "num_tokens": 11337990.0, + "step": 840 + }, + { + "epoch": 0.5176613885505481, + "grad_norm": 0.43992146849632263, + "learning_rate": 0.0001906080935251716, + "loss": 0.7612, + "mean_token_accuracy": 0.7999786615371705, + "num_tokens": 11481565.0, + "step": 850 + }, + { + "epoch": 0.5237515225334958, + "grad_norm": 0.5595120191574097, + "learning_rate": 0.0001903431563056256, + "loss": 0.8266, + "mean_token_accuracy": 0.7859750911593437, + "num_tokens": 11611714.0, + "step": 860 + }, + { + "epoch": 0.5298416565164433, + "grad_norm": 0.5001987218856812, + "learning_rate": 0.0001900747456862467, + "loss": 0.8506, + "mean_token_accuracy": 0.779585388302803, + "num_tokens": 11736573.0, + "step": 870 + }, + { + "epoch": 0.535931790499391, + "grad_norm": 0.430147647857666, + "learning_rate": 0.00018980287327097784, + "loss": 0.7707, + "mean_token_accuracy": 0.795211361348629, + "num_tokens": 11876859.0, + "step": 880 + }, + { + "epoch": 0.5420219244823387, + "grad_norm": 0.5346289873123169, + "learning_rate": 0.00018952755081342245, + "loss": 0.8057, + "mean_token_accuracy": 0.7871127843856811, + "num_tokens": 12007654.0, + "step": 890 + }, + { + "epoch": 0.5481120584652862, + "grad_norm": 0.46072253584861755, + "learning_rate": 0.00018924879021633653, + "loss": 0.7924, + "mean_token_accuracy": 0.7913773030042648, + "num_tokens": 12140520.0, + "step": 900 + }, + { + "epoch": 0.5542021924482339, + "grad_norm": 0.4803653955459595, + "learning_rate": 0.00018896660353111375, + "loss": 0.8398, + "mean_token_accuracy": 0.7807079553604126, + "num_tokens": 12267219.0, + "step": 910 + }, + { + "epoch": 0.5602923264311814, + "grad_norm": 0.5219636559486389, + "learning_rate": 0.0001886810029572647, + "loss": 0.7612, + "mean_token_accuracy": 0.7993015512824059, + "num_tokens": 12404646.0, + "step": 920 + }, + { + "epoch": 0.5663824604141291, + "grad_norm": 0.501483142375946, + "learning_rate": 0.00018839200084188936, + "loss": 0.7953, + "mean_token_accuracy": 0.787814213335514, + "num_tokens": 12538219.0, + "step": 930 + }, + { + "epoch": 0.5724725943970768, + "grad_norm": 0.47334522008895874, + "learning_rate": 0.00018809960967914346, + "loss": 0.789, + "mean_token_accuracy": 0.7928574904799461, + "num_tokens": 12673805.0, + "step": 940 + }, + { + "epoch": 0.5785627283800243, + "grad_norm": 0.5057492852210999, + "learning_rate": 0.00018780384210969806, + "loss": 0.7746, + "mean_token_accuracy": 0.7947553545236588, + "num_tokens": 12811727.0, + "step": 950 + }, + { + "epoch": 0.584652862362972, + "grad_norm": 0.5179910659790039, + "learning_rate": 0.00018750471092019325, + "loss": 0.7962, + "mean_token_accuracy": 0.7905686929821968, + "num_tokens": 12947641.0, + "step": 960 + }, + { + "epoch": 0.5907429963459196, + "grad_norm": 0.45797088742256165, + "learning_rate": 0.00018720222904268543, + "loss": 0.7678, + "mean_token_accuracy": 0.7969774708151818, + "num_tokens": 13083869.0, + "step": 970 + }, + { + "epoch": 0.5968331303288672, + "grad_norm": 0.48360612988471985, + "learning_rate": 0.00018689640955408803, + "loss": 0.7996, + "mean_token_accuracy": 0.7885591968894005, + "num_tokens": 13211807.0, + "step": 980 + }, + { + "epoch": 0.6029232643118149, + "grad_norm": 0.4378497004508972, + "learning_rate": 0.00018658726567560635, + "loss": 0.7652, + "mean_token_accuracy": 0.7969291344285011, + "num_tokens": 13351856.0, + "step": 990 + }, + { + "epoch": 0.6090133982947625, + "grad_norm": 0.4857536852359772, + "learning_rate": 0.00018627481077216577, + "loss": 0.7786, + "mean_token_accuracy": 0.7914443418383599, + "num_tokens": 13486443.0, + "step": 1000 + }, + { + "epoch": 0.6151035322777101, + "grad_norm": 0.5233064293861389, + "learning_rate": 0.0001859590583518343, + "loss": 0.8241, + "mean_token_accuracy": 0.7811850637197495, + "num_tokens": 13612035.0, + "step": 1010 + }, + { + "epoch": 0.6211936662606578, + "grad_norm": 0.5328738689422607, + "learning_rate": 0.00018564002206523816, + "loss": 0.7502, + "mean_token_accuracy": 0.7993430674076081, + "num_tokens": 13756509.0, + "step": 1020 + }, + { + "epoch": 0.6272838002436053, + "grad_norm": 0.47962310910224915, + "learning_rate": 0.000185317715704972, + "loss": 0.7984, + "mean_token_accuracy": 0.7864531084895134, + "num_tokens": 13883033.0, + "step": 1030 + }, + { + "epoch": 0.633373934226553, + "grad_norm": 0.5685893893241882, + "learning_rate": 0.0001849921532050024, + "loss": 0.7869, + "mean_token_accuracy": 0.7909937381744385, + "num_tokens": 14015234.0, + "step": 1040 + }, + { + "epoch": 0.6394640682095006, + "grad_norm": 0.49146631360054016, + "learning_rate": 0.00018466334864006566, + "loss": 0.7952, + "mean_token_accuracy": 0.7878949210047722, + "num_tokens": 14149319.0, + "step": 1050 + }, + { + "epoch": 0.6455542021924482, + "grad_norm": 0.5556225776672363, + "learning_rate": 0.0001843313162250591, + "loss": 0.7524, + "mean_token_accuracy": 0.7994373366236687, + "num_tokens": 14286868.0, + "step": 1060 + }, + { + "epoch": 0.6516443361753959, + "grad_norm": 0.511379063129425, + "learning_rate": 0.00018399607031442666, + "loss": 0.7929, + "mean_token_accuracy": 0.7921562284231186, + "num_tokens": 14418354.0, + "step": 1070 + }, + { + "epoch": 0.6577344701583435, + "grad_norm": 0.5019840598106384, + "learning_rate": 0.00018365762540153836, + "loss": 0.758, + "mean_token_accuracy": 0.7989353060722351, + "num_tokens": 14553174.0, + "step": 1080 + }, + { + "epoch": 0.6638246041412911, + "grad_norm": 0.6032467484474182, + "learning_rate": 0.00018331599611806366, + "loss": 0.7888, + "mean_token_accuracy": 0.7903819754719734, + "num_tokens": 14681393.0, + "step": 1090 + }, + { + "epoch": 0.6699147381242387, + "grad_norm": 0.5369830131530762, + "learning_rate": 0.00018297119723333877, + "loss": 0.765, + "mean_token_accuracy": 0.7950262635946274, + "num_tokens": 14814565.0, + "step": 1100 + }, + { + "epoch": 0.6760048721071864, + "grad_norm": 0.5289803743362427, + "learning_rate": 0.00018262324365372846, + "loss": 0.7496, + "mean_token_accuracy": 0.8032818242907525, + "num_tokens": 14954351.0, + "step": 1110 + }, + { + "epoch": 0.682095006090134, + "grad_norm": 0.5440439581871033, + "learning_rate": 0.0001822721504219814, + "loss": 0.7432, + "mean_token_accuracy": 0.799126236140728, + "num_tokens": 15094879.0, + "step": 1120 + }, + { + "epoch": 0.6881851400730816, + "grad_norm": 0.46225935220718384, + "learning_rate": 0.00018191793271657978, + "loss": 0.7513, + "mean_token_accuracy": 0.8022688791155815, + "num_tokens": 15234906.0, + "step": 1130 + }, + { + "epoch": 0.6942752740560292, + "grad_norm": 0.5592020750045776, + "learning_rate": 0.0001815606058510833, + "loss": 0.7583, + "mean_token_accuracy": 0.7984497547149658, + "num_tokens": 15373526.0, + "step": 1140 + }, + { + "epoch": 0.7003654080389768, + "grad_norm": 0.525090217590332, + "learning_rate": 0.00018120018527346702, + "loss": 0.7254, + "mean_token_accuracy": 0.8070619881153107, + "num_tokens": 15516264.0, + "step": 1150 + }, + { + "epoch": 0.7064555420219245, + "grad_norm": 0.5380759239196777, + "learning_rate": 0.00018083668656545355, + "loss": 0.8041, + "mean_token_accuracy": 0.7866759791970253, + "num_tokens": 15640444.0, + "step": 1160 + }, + { + "epoch": 0.7125456760048721, + "grad_norm": 0.47815701365470886, + "learning_rate": 0.00018047012544183938, + "loss": 0.7604, + "mean_token_accuracy": 0.796156468987465, + "num_tokens": 15778070.0, + "step": 1170 + }, + { + "epoch": 0.7186358099878197, + "grad_norm": 0.5380450487136841, + "learning_rate": 0.00018010051774981553, + "loss": 0.8135, + "mean_token_accuracy": 0.7842124432325364, + "num_tokens": 15899739.0, + "step": 1180 + }, + { + "epoch": 0.7247259439707674, + "grad_norm": 0.5047502517700195, + "learning_rate": 0.00017972787946828246, + "loss": 0.7642, + "mean_token_accuracy": 0.7989341139793396, + "num_tokens": 16035805.0, + "step": 1190 + }, + { + "epoch": 0.730816077953715, + "grad_norm": 0.5440967679023743, + "learning_rate": 0.00017935222670715918, + "loss": 0.735, + "mean_token_accuracy": 0.8048294603824615, + "num_tokens": 16172541.0, + "step": 1200 + }, + { + "epoch": 0.7369062119366626, + "grad_norm": 0.4766077399253845, + "learning_rate": 0.000178973575706687, + "loss": 0.805, + "mean_token_accuracy": 0.7871790423989296, + "num_tokens": 16296988.0, + "step": 1210 + }, + { + "epoch": 0.7429963459196103, + "grad_norm": 0.4153214991092682, + "learning_rate": 0.00017859194283672704, + "loss": 0.7635, + "mean_token_accuracy": 0.7964595645666123, + "num_tokens": 16432022.0, + "step": 1220 + }, + { + "epoch": 0.7490864799025578, + "grad_norm": 0.4698518216609955, + "learning_rate": 0.00017820734459605302, + "loss": 0.7397, + "mean_token_accuracy": 0.8046972885727882, + "num_tokens": 16572880.0, + "step": 1230 + }, + { + "epoch": 0.7551766138855055, + "grad_norm": 0.46101540327072144, + "learning_rate": 0.00017781979761163756, + "loss": 0.7174, + "mean_token_accuracy": 0.8066875368356705, + "num_tokens": 16714419.0, + "step": 1240 + }, + { + "epoch": 0.761266747868453, + "grad_norm": 0.5313341021537781, + "learning_rate": 0.00017742931863793358, + "loss": 0.7797, + "mean_token_accuracy": 0.7911526098847389, + "num_tokens": 16838285.0, + "step": 1250 + }, + { + "epoch": 0.7673568818514007, + "grad_norm": 0.4627362787723541, + "learning_rate": 0.00017703592455614998, + "loss": 0.7626, + "mean_token_accuracy": 0.7970306649804115, + "num_tokens": 16976065.0, + "step": 1260 + }, + { + "epoch": 0.7734470158343484, + "grad_norm": 0.5429073572158813, + "learning_rate": 0.00017663963237352177, + "loss": 0.7398, + "mean_token_accuracy": 0.8005403786897659, + "num_tokens": 17112901.0, + "step": 1270 + }, + { + "epoch": 0.7795371498172959, + "grad_norm": 0.6781270503997803, + "learning_rate": 0.00017624045922257471, + "loss": 0.7607, + "mean_token_accuracy": 0.7946217939257622, + "num_tokens": 17245480.0, + "step": 1280 + }, + { + "epoch": 0.7856272838002436, + "grad_norm": 0.5227305293083191, + "learning_rate": 0.00017583842236038483, + "loss": 0.7217, + "mean_token_accuracy": 0.8064659267663956, + "num_tokens": 17387171.0, + "step": 1290 + }, + { + "epoch": 0.7917174177831913, + "grad_norm": 0.49253156781196594, + "learning_rate": 0.0001754335391678323, + "loss": 0.7652, + "mean_token_accuracy": 0.7960015773773194, + "num_tokens": 17521164.0, + "step": 1300 + }, + { + "epoch": 0.7978075517661388, + "grad_norm": 0.5103631615638733, + "learning_rate": 0.00017502582714884997, + "loss": 0.7435, + "mean_token_accuracy": 0.7995276898145676, + "num_tokens": 17657818.0, + "step": 1310 + }, + { + "epoch": 0.8038976857490865, + "grad_norm": 0.5531247854232788, + "learning_rate": 0.00017461530392966665, + "loss": 0.7986, + "mean_token_accuracy": 0.7892467245459557, + "num_tokens": 17784361.0, + "step": 1320 + }, + { + "epoch": 0.8099878197320342, + "grad_norm": 0.4574586749076843, + "learning_rate": 0.00017420198725804517, + "loss": 0.6889, + "mean_token_accuracy": 0.8135112956166267, + "num_tokens": 17929664.0, + "step": 1330 + }, + { + "epoch": 0.8160779537149817, + "grad_norm": 0.4734383225440979, + "learning_rate": 0.00017378589500251498, + "loss": 0.7308, + "mean_token_accuracy": 0.8029947131872177, + "num_tokens": 18071182.0, + "step": 1340 + }, + { + "epoch": 0.8221680876979294, + "grad_norm": 0.5192279815673828, + "learning_rate": 0.00017336704515159986, + "loss": 0.7444, + "mean_token_accuracy": 0.8012512847781181, + "num_tokens": 18211136.0, + "step": 1350 + }, + { + "epoch": 0.8282582216808769, + "grad_norm": 0.5378620624542236, + "learning_rate": 0.00017294545581303996, + "loss": 0.7459, + "mean_token_accuracy": 0.7981989249587059, + "num_tokens": 18340645.0, + "step": 1360 + }, + { + "epoch": 0.8343483556638246, + "grad_norm": 0.4879571497440338, + "learning_rate": 0.00017252114521300918, + "loss": 0.7877, + "mean_token_accuracy": 0.7891893342137337, + "num_tokens": 18465733.0, + "step": 1370 + }, + { + "epoch": 0.8404384896467723, + "grad_norm": 0.5297388434410095, + "learning_rate": 0.00017209413169532717, + "loss": 0.7586, + "mean_token_accuracy": 0.797142505645752, + "num_tokens": 18598979.0, + "step": 1380 + }, + { + "epoch": 0.8465286236297198, + "grad_norm": 0.5308396220207214, + "learning_rate": 0.00017166443372066618, + "loss": 0.7387, + "mean_token_accuracy": 0.80123979896307, + "num_tokens": 18735919.0, + "step": 1390 + }, + { + "epoch": 0.8526187576126675, + "grad_norm": 0.49988579750061035, + "learning_rate": 0.0001712320698657532, + "loss": 0.7425, + "mean_token_accuracy": 0.7996803268790245, + "num_tokens": 18870877.0, + "step": 1400 + }, + { + "epoch": 0.8587088915956151, + "grad_norm": 0.5971361994743347, + "learning_rate": 0.0001707970588225665, + "loss": 0.7691, + "mean_token_accuracy": 0.7922965154051781, + "num_tokens": 19000943.0, + "step": 1410 + }, + { + "epoch": 0.8647990255785627, + "grad_norm": 0.5141698718070984, + "learning_rate": 0.00017035941939752802, + "loss": 0.7203, + "mean_token_accuracy": 0.8036229625344277, + "num_tokens": 19135039.0, + "step": 1420 + }, + { + "epoch": 0.8708891595615104, + "grad_norm": 0.4647749066352844, + "learning_rate": 0.0001699191705106898, + "loss": 0.7136, + "mean_token_accuracy": 0.8064323276281357, + "num_tokens": 19274069.0, + "step": 1430 + }, + { + "epoch": 0.876979293544458, + "grad_norm": 0.5511934161186218, + "learning_rate": 0.00016947633119491633, + "loss": 0.7455, + "mean_token_accuracy": 0.7985599264502525, + "num_tokens": 19409679.0, + "step": 1440 + }, + { + "epoch": 0.8830694275274056, + "grad_norm": 0.4936945140361786, + "learning_rate": 0.00016903092059506182, + "loss": 0.7087, + "mean_token_accuracy": 0.806523185968399, + "num_tokens": 19547419.0, + "step": 1450 + }, + { + "epoch": 0.8891595615103532, + "grad_norm": 0.5227787494659424, + "learning_rate": 0.00016858295796714213, + "loss": 0.7739, + "mean_token_accuracy": 0.7941467314958572, + "num_tokens": 19674455.0, + "step": 1460 + }, + { + "epoch": 0.8952496954933008, + "grad_norm": 0.5046219825744629, + "learning_rate": 0.00016813246267750282, + "loss": 0.7361, + "mean_token_accuracy": 0.8008369222283364, + "num_tokens": 19809861.0, + "step": 1470 + }, + { + "epoch": 0.9013398294762485, + "grad_norm": 0.4827081263065338, + "learning_rate": 0.00016767945420198142, + "loss": 0.7464, + "mean_token_accuracy": 0.7986427888274192, + "num_tokens": 19940696.0, + "step": 1480 + }, + { + "epoch": 0.9074299634591961, + "grad_norm": 0.4970889687538147, + "learning_rate": 0.00016722395212506567, + "loss": 0.7528, + "mean_token_accuracy": 0.7965970665216446, + "num_tokens": 20070686.0, + "step": 1490 + }, + { + "epoch": 0.9135200974421437, + "grad_norm": 0.44478070735931396, + "learning_rate": 0.00016676597613904693, + "loss": 0.7185, + "mean_token_accuracy": 0.8081388726830483, + "num_tokens": 20210260.0, + "step": 1500 + }, + { + "epoch": 0.9196102314250914, + "grad_norm": 0.506136417388916, + "learning_rate": 0.00016630554604316866, + "loss": 0.7395, + "mean_token_accuracy": 0.8003876298666001, + "num_tokens": 20346235.0, + "step": 1510 + }, + { + "epoch": 0.925700365408039, + "grad_norm": 0.500946044921875, + "learning_rate": 0.00016584268174277053, + "loss": 0.6889, + "mean_token_accuracy": 0.8124501362442971, + "num_tokens": 20481248.0, + "step": 1520 + }, + { + "epoch": 0.9317904993909866, + "grad_norm": 0.48528990149497986, + "learning_rate": 0.00016537740324842795, + "loss": 0.7227, + "mean_token_accuracy": 0.8041250064969063, + "num_tokens": 20613531.0, + "step": 1530 + }, + { + "epoch": 0.9378806333739342, + "grad_norm": 0.5070951581001282, + "learning_rate": 0.00016490973067508674, + "loss": 0.7091, + "mean_token_accuracy": 0.8082544595003128, + "num_tokens": 20750784.0, + "step": 1540 + }, + { + "epoch": 0.9439707673568819, + "grad_norm": 0.5583120584487915, + "learning_rate": 0.0001644396842411939, + "loss": 0.7405, + "mean_token_accuracy": 0.7992320343852043, + "num_tokens": 20883646.0, + "step": 1550 + }, + { + "epoch": 0.9500609013398295, + "grad_norm": 0.5099635124206543, + "learning_rate": 0.00016396728426782312, + "loss": 0.7103, + "mean_token_accuracy": 0.8091216519474983, + "num_tokens": 21025143.0, + "step": 1560 + }, + { + "epoch": 0.9561510353227771, + "grad_norm": 0.5777808427810669, + "learning_rate": 0.00016349255117779652, + "loss": 0.7245, + "mean_token_accuracy": 0.8023119494318962, + "num_tokens": 21160014.0, + "step": 1570 + }, + { + "epoch": 0.9622411693057247, + "grad_norm": 0.5206162333488464, + "learning_rate": 0.0001630155054948016, + "loss": 0.7185, + "mean_token_accuracy": 0.8069521963596344, + "num_tokens": 21299094.0, + "step": 1580 + }, + { + "epoch": 0.9683313032886723, + "grad_norm": 0.5763202905654907, + "learning_rate": 0.00016253616784250415, + "loss": 0.7677, + "mean_token_accuracy": 0.7927820891141891, + "num_tokens": 21429252.0, + "step": 1590 + }, + { + "epoch": 0.97442143727162, + "grad_norm": 0.5068426728248596, + "learning_rate": 0.00016205455894365627, + "loss": 0.7673, + "mean_token_accuracy": 0.794715291261673, + "num_tokens": 21556200.0, + "step": 1600 + }, + { + "epoch": 0.9805115712545676, + "grad_norm": 0.46094459295272827, + "learning_rate": 0.0001615706996192009, + "loss": 0.771, + "mean_token_accuracy": 0.7921045809984207, + "num_tokens": 21681524.0, + "step": 1610 + }, + { + "epoch": 0.9866017052375152, + "grad_norm": 0.5063546299934387, + "learning_rate": 0.00016108461078737148, + "loss": 0.7383, + "mean_token_accuracy": 0.800596435368061, + "num_tokens": 21814109.0, + "step": 1620 + }, + { + "epoch": 0.9926918392204629, + "grad_norm": 0.5418652296066284, + "learning_rate": 0.0001605963134627876, + "loss": 0.7431, + "mean_token_accuracy": 0.7994748756289483, + "num_tokens": 21947346.0, + "step": 1630 + }, + { + "epoch": 0.9987819732034104, + "grad_norm": 0.6195595264434814, + "learning_rate": 0.0001601058287555465, + "loss": 0.7294, + "mean_token_accuracy": 0.8030684441328049, + "num_tokens": 22081340.0, + "step": 1640 + } + ], + "logging_steps": 10, + "max_steps": 4926, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.6993952090530775e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1642/training_args.bin b/checkpoint-1642/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7aaf98c04505b149e2e8903151128d95e7ab7b98 --- /dev/null +++ b/checkpoint-1642/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d57fe99d74b7e16ba38edb5265078ef5a69a65f6b630b2ca3feaacdb770f49e +size 6161 diff --git a/checkpoint-3284/README.md b/checkpoint-3284/README.md new file mode 100644 index 0000000000000000000000000000000000000000..88d5a6d478d4dd6dceb04cd4688496bde58deabd --- /dev/null +++ b/checkpoint-3284/README.md @@ -0,0 +1,208 @@ +--- +base_model: openai/gpt-oss-20b +library_name: peft +tags: +- base_model:adapter:openai/gpt-oss-20b +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-3284/adapter_config.json b/checkpoint-3284/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f7444c0a113bd6bbbb6725b2090d1700b55b21c --- /dev/null +++ b/checkpoint-3284/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GptOssForCausalLM", + "parent_library": "transformers.models.gpt_oss.modeling_gpt_oss" + }, + "base_model_name_or_path": "openai/gpt-oss-20b", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "target_parameters": [ + "7.mlp.experts.gate_up_proj", + "7.mlp.experts.down_proj", + "15.mlp.experts.gate_up_proj", + "15.mlp.experts.down_proj", + "23.mlp.experts.gate_up_proj", + "23.mlp.experts.down_proj" + ], + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-3284/adapter_model.safetensors b/checkpoint-3284/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bad0efe7b515a6adff224d1ea8759ad12a96ca6 --- /dev/null +++ b/checkpoint-3284/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863cffb057e314cefc4b79ade1f0ec14ceffa2ba036738faf8f97bd97cd0ceba +size 2366470368 diff --git a/checkpoint-3284/chat_template.jinja b/checkpoint-3284/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..dc7bb11927d29f653ba2740f2db2c688fd77592f --- /dev/null +++ b/checkpoint-3284/chat_template.jinja @@ -0,0 +1,331 @@ +{#- + In addition to the normal inputs of `messages` and `tools`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\n\n" }} + {{- "namespace " + namespace_name + " {\n\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\n" }} + {{- "type "+ tool.name + " = " }} + {%- if tool.parameters and tool.parameters.properties %} + {{- "(_: {\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {%- if param_spec.description %} + {{- "// " + param_spec.description + "\n" }} + {%- endif %} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum %} + {{- ", // default: " + param_spec.default }} + {%- elif param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\n" }} + {%- else %} + {{- ",\n" }} + {%- endif -%} + {%- endfor %} + {{- "}) => any;\n\n" }} + {%- else -%} + {{- "() => any;\n\n" }} + {%- endif -%} + {%- endfor %} + {{- "} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\n\n" }} + {{- "// Tool for browsing.\n" }} + {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }} + {{- "// Cite information from the tool using the following format:\n" }} + {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\n" }} + {{- "// sources=web (default: web)\n" }} + {{- "namespace browser {\n\n" }} + {{- "// Searches for information related to `query` and displays `topn` results.\n" }} + {{- "type search = (_: {\n" }} + {{- "query: string,\n" }} + {{- "topn?: number, // default: 10\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }} + {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }} + {{- "// If `cursor` is not provided, the most recent page is implied.\n" }} + {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }} + {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }} + {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }} + {{- "type open = (_: {\n" }} + {{- "id?: number | string, // default: -1\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "loc?: number, // default: -1\n" }} + {{- "num_lines?: number, // default: -1\n" }} + {{- "view_source?: boolean, // default: false\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }} + {{- "type find = (_: {\n" }} + {{- "pattern: string,\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "}) => any;\n\n" }} + {{- "} // namespace browser\n\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\n\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %} + {%- endif %} + {{- model_identity + "\n" }} + {{- "Knowledge cutoff: 2024-06\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "Reasoning: " + reasoning_effort + "\n\n" }} + {%- if builtin_tools %} + {{- "# Tools\n\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }} + {%- if tools -%} + {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }} + {%- endif -%} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\n\n" }} + {{- developer_message }} + {{- "\n\n" }} + {%- endif %} + {%- if tools -%} + {{- "# Tools\n\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {#- Checks to ensure the messages are being passed in the format we expect #} + {%- if "content" in message %} + {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "thinking" in message %} + {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "tool_calls" in message %} + {#- We need very careful handling here - we want to drop the tool call analysis message if the model #} + {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #} + {#- when we render CoT/analysis messages in inference. #} + {%- set future_final_message = namespace(found=false) %} + {%- for future_message in loop_messages[loop.index:] %} + {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %} + {%- set future_final_message.found = true %} + {%- endif %} + {%- endfor %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content and message.thinking %} + {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }} + {%- elif message.content and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- elif message.thinking and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary " }} + {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|call|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {%- if "thinking" in message %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- else %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- elif message.role == 'user' -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} \ No newline at end of file diff --git a/checkpoint-3284/optimizer.pt b/checkpoint-3284/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b704922c59a3714007eea6f419a414694f4872ff --- /dev/null +++ b/checkpoint-3284/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31431c1c610dd489524726e1e535570f80ad216979b4133ca46b6b1395c50c0 +size 120495883 diff --git a/checkpoint-3284/rng_state_0.pth b/checkpoint-3284/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..059b96b1c331e49bdb856fd3578b18fbdf2a734f --- /dev/null +++ b/checkpoint-3284/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfe1981024ef92f2da08a90c72c7c793d1cc9de1547abd2556c968be70232eb +size 16389 diff --git a/checkpoint-3284/rng_state_1.pth b/checkpoint-3284/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..69a4018f64d01bcaa0a987c1d153a2119227d89e --- /dev/null +++ b/checkpoint-3284/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b845d476d830805793c3dcf8ac2daad87fec289bff3f7eda9e72fc374eda1 +size 16389 diff --git a/checkpoint-3284/rng_state_2.pth b/checkpoint-3284/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e33ffe6b2f9591b3b8d5ff7c2875102fd827a82 --- /dev/null +++ b/checkpoint-3284/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e9880996b01262a807d1ec3ebd91eee540e08130a14a45a4648731fd0d48a9 +size 16389 diff --git a/checkpoint-3284/rng_state_3.pth b/checkpoint-3284/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..fedd64c60cfc4f537094e08f997997e486a949f5 --- /dev/null +++ b/checkpoint-3284/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee25c237d6fe62ec76adcf7daf899d7ed32eab5d1a5b447b911f4451c9a1b258 +size 16389 diff --git a/checkpoint-3284/rng_state_4.pth b/checkpoint-3284/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..b03eafe578497e5f84f4694d0d884dc8033e8925 --- /dev/null +++ b/checkpoint-3284/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6b31133f29a8fc0cb538aa807d6a403bd51939336bfd425cd3d122d8c5595c +size 16389 diff --git a/checkpoint-3284/rng_state_5.pth b/checkpoint-3284/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbfc5829722403deeb503cf2410e739dd820562e --- /dev/null +++ b/checkpoint-3284/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26c55b5c7fa0522b1d27b2c00a7ea77ad010f19a1321991165c5c972b8fa97a +size 16389 diff --git a/checkpoint-3284/rng_state_6.pth b/checkpoint-3284/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2bc738687e31f23a72d29689ca6cebb97cf798f --- /dev/null +++ b/checkpoint-3284/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1a3cf85626196804f25a8293e22dc561bba068a70fb123e04afe4896c33972 +size 16389 diff --git a/checkpoint-3284/rng_state_7.pth b/checkpoint-3284/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..45026a0962b4f14d2886c0fd43dc92df6f9efe86 --- /dev/null +++ b/checkpoint-3284/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f87c1ee5f5db346c7b913137cbccd196eaf8ec5a4cf9f192418a3069269b49 +size 16389 diff --git a/checkpoint-3284/scheduler.pt b/checkpoint-3284/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9390e53f46de124cbae002199810fabb13fcc3d --- /dev/null +++ b/checkpoint-3284/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9aa280844c8740ef7748b8e17553210dcb3a3e5062e357e0434caf53ebef75 +size 1465 diff --git a/checkpoint-3284/special_tokens_map.json b/checkpoint-3284/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a463562962fc1910d6c169b0a7e9c95872abc92e --- /dev/null +++ b/checkpoint-3284/special_tokens_map.json @@ -0,0 +1,1817 @@ +{ + "additional_special_tokens": [ + { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-3284/tokenizer.json b/checkpoint-3284/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e0ace705cfa5bd60a370b9daafe3e1513770b9 --- /dev/null +++ b/checkpoint-3284/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec3af79eb37b392bb5382bfe3f4eeab633498c220a804f4fd5d7d102a000f1a +size 27914312 diff --git a/checkpoint-3284/tokenizer_config.json b/checkpoint-3284/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..923c92874790920d6eaf5c317c2d63cd3b569e62 --- /dev/null +++ b/checkpoint-3284/tokenizer_config.json @@ -0,0 +1,2489 @@ +{ + "added_tokens_decoder": { + "1529": { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2022": { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2416": { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3360": { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4478": { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5173": { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6258": { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6781": { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7726": { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9375": { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9760": { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10227": { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11720": { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12235": { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12515": { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13874": { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13905": { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15409": { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17183": { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19862": { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20174": { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22723": { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27968": { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29829": { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29864": { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34134": { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34435": { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39749": { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43230": { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46966": { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47787": { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47994": { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50719": { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52907": { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54793": { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58530": { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66773": { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68495": { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70684": { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72089": { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74923": { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77411": { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81990": { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82244": { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84526": { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86154": { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86297": { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93660": { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94432": { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95202": { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100413": { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104755": { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104805": { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106979": { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110871": { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117565": { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118754": { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126731": { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126978": { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130911": { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147130": { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152076": { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152095": { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152119": { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155474": { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158359": { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162121": { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166996": { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171893": { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172873": { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175772": { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177970": { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178261": { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178974": { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186075": { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187697": { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199998": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199999": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200000": { + "content": "<|reserved_200000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200001": { + "content": "<|reserved_200001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200002": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200003": { + "content": "<|constrain|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200004": { + "content": "<|reserved_200004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200005": { + "content": "<|channel|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200006": { + "content": "<|start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200007": { + "content": "<|end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200008": { + "content": "<|message|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200009": { + "content": "<|reserved_200009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200010": { + "content": "<|reserved_200010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200011": { + "content": "<|reserved_200011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200012": { + "content": "<|call|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200013": { + "content": "<|reserved_200013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200014": { + "content": "<|reserved_200014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200015": { + "content": "<|reserved_200015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200016": { + "content": "<|reserved_200016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200017": { + "content": "<|reserved_200017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200018": { + "content": "<|endofprompt|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200019": { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200020": { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200021": { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200022": { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200023": { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200024": { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200025": { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200026": { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200027": { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200028": { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200029": { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200030": { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200031": { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200032": { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200033": { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200034": { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200035": { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200036": { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200037": { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200038": { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200039": { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200040": { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200041": { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200042": { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200043": { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200044": { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200045": { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200046": { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200047": { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200048": { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200049": { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200050": { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200051": { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200052": { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200053": { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200054": { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200055": { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200056": { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200057": { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200058": { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200059": { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200060": { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200061": { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200062": { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200063": { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200064": { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200065": { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200066": { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200067": { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200068": { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200069": { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200070": { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200071": { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200072": { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200073": { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200074": { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200075": { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200076": { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200077": { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200078": { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200079": { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200080": { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200081": { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200082": { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200083": { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200084": { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200085": { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200086": { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200087": { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200088": { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200089": { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200090": { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200091": { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200092": { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200093": { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200094": { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200095": { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200096": { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200097": { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200098": { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200099": { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200100": { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200101": { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200102": { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200103": { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200104": { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200105": { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200106": { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200107": { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200108": { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200109": { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200110": { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200111": { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200112": { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200113": { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200114": { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200115": { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200116": { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200117": { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200118": { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200119": { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200120": { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200121": { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200122": { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200123": { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200124": { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200125": { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200126": { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200127": { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200128": { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200129": { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200130": { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200131": { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200132": { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200133": { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200134": { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200135": { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200136": { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200137": { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200138": { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200139": { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200140": { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200141": { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200142": { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200143": { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200144": { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200145": { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200146": { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200147": { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200148": { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200149": { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200150": { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200151": { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200152": { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200153": { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200154": { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200155": { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200156": { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200157": { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200158": { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200159": { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200160": { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200161": { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200162": { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200163": { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200164": { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200165": { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200166": { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200167": { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200168": { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200169": { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200170": { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200171": { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200172": { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200173": { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200174": { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200175": { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200176": { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200177": { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200178": { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200179": { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200180": { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200181": { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200182": { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200183": { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200184": { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200185": { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200186": { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200187": { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200188": { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200189": { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200190": { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200191": { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200192": { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200193": { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200194": { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200195": { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200196": { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200197": { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "AAD", + "AArch64", + "ACL", + "AD", + "AES", + "AES256GCM", + "AESCBC", + "AKE", + "AON", + "ASID", + "AXI", + "Acronym", + "AoU", + "AutoSar", + "BAM", + "BCH", + "BIST", + "BOM", + "BPMP", + "BPS", + "BPU", + "BRBCT", + "BW", + "C2C", + "CA", + "CAN", + "CANFD", + "CAR", + "CAVP", + "CBB", + "CBC", + "CBR", + "CCM", + "CCPLEX", + "CCPLEX_L2", + "CCPLEX_MISC", + "CCPLEX_SCF", + "CDD", + "CIF", + "CMAC", + "CPE", + "CPU", + "CRC", + "CSI", + "CSP", + "CTR", + "CTXT", + "CV", + "DBB", + "DEP", + "DEV", + "DFA", + "DFT", + "DIP", + "DISPLAY", + "DLA", + "DMA", + "DMEM", + "DPA", + "DSC", + "DT", + "DU", + "DVMU", + "EC", + "ECB", + "ECC", + "ECDHE", + "ECDSA", + "ECID", + "EDR", + "EOF", + "EOTTI", + "EQoS", + "FCL", + "FHTI", + "FIPS", + "FMEA", + "FMON", + "FO", + "FP", + "FPS", + "FW", + "FuSa", + "GCM", + "GFD", + "GIC", + "GMAC", + "GMSL", + "GOP", + "GP", + "GPCDMA", + "GPU", + "GR", + "Gpps", + "HBR", + "HBR2", + "HBR3", + "HDS", + "HIS", + "HMAC", + "HPSE", + "HSI", + "HSM", + "HSP", + "HW", + "IAS", + "IC", + "ICD", + "IDR", + "IDT", + "IEP", + "IEU", + "IFU", + "IID", + "ILD", + "IMEM", + "IOC", + "IOFA", + "IOMMU", + "IPC", + "IPI", + "IRF", + "IST", + "IV", + "IoT", + "JSR", + "KAT", + "KCV", + "KDF", + "KPI", + "L1PT", + "L2C", + "L2mDIR", + "L2vDIR", + "LAB", + "LBIST", + "LDC", + "LFT", + "LIC", + "LIP", + "LSB", + "MAC", + "MAQ", + "MB", + "MBIST", + "MCAL", + "MCE", + "MCU", + "MSB", + "MSS", + "MST", + "MTS", + "MiTM", + "NIP", + "NIST", + "NITO", + "NOC", + "NOOP", + "NT", + "NVDEC", + "NVENC", + "NVJPG", + "NVM", + "NVVSE", + "OEM", + "OFA", + "OS", + "OSP", + "OTP", + "PB", + "PCIE", + "PCPU", + "PCR", + "PCT", + "PDK", + "PII", + "PIP", + "PKC", + "PKCS", + "PKI", + "PL", + "PLA", + "POR", + "PPC", + "PSC", + "PTXT", + "PVA", + "QNX", + "QOS", + "QSPI", + "RBG", + "RBR", + "RC", + "RDEV", + "REE", + "RMA", + "RMW", + "RSA", + "RSB", + "RTS", + "RoT", + "SAE", + "SBK", + "SCH", + "SDK", + "SE", + "SEL0", + "SEL1", + "SEooC", + "SGM", + "SHA", + "SHA256", + "SHA512", + "SHE", + "SKU", + "SNOC", + "SO", + "SPA", + "SQ", + "SSR", + "SST", + "SW", + "SWAT", + "SoC", + "TA", + "TCF", + "TEE", + "THI", + "TNR", + "TOS", + "TRC", + "TRL", + "TSEC", + "TZ", + "UFS", + "VBR", + "VCPU", + "VI", + "VIC", + "VMEM", + "VMID", + "VPU", + "VRC", + "VUI", + "WARB", + "XIP", + "bpp", + "eMMC", + "hfPLA", + "iGPU", + "ipc", + "ipc_fg", + "ipc_t", + "sbPLA", + "xBTV", + "xps" + ], + "bos_token": "<|startoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|return|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|endoftext|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-3284/trainer_state.json b/checkpoint-3284/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f2a02239e1359b344847f826f9fca1f922aaa8f --- /dev/null +++ b/checkpoint-3284/trainer_state.json @@ -0,0 +1,2986 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 3284, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0060901339829476245, + "grad_norm": 23.5319766998291, + "learning_rate": 1.2162162162162164e-05, + "loss": 4.5905, + "mean_token_accuracy": 0.3401473943144083, + "num_tokens": 132681.0, + "step": 10 + }, + { + "epoch": 0.012180267965895249, + "grad_norm": 6.916630744934082, + "learning_rate": 2.5675675675675675e-05, + "loss": 3.957, + "mean_token_accuracy": 0.3799716055393219, + "num_tokens": 264238.0, + "step": 20 + }, + { + "epoch": 0.018270401948842874, + "grad_norm": 2.717982292175293, + "learning_rate": 3.918918918918919e-05, + "loss": 3.047, + "mean_token_accuracy": 0.46811963245272636, + "num_tokens": 401308.0, + "step": 30 + }, + { + "epoch": 0.024360535931790498, + "grad_norm": 2.407865524291992, + "learning_rate": 5.27027027027027e-05, + "loss": 2.5322, + "mean_token_accuracy": 0.5190828196704388, + "num_tokens": 532444.0, + "step": 40 + }, + { + "epoch": 0.030450669914738125, + "grad_norm": 1.018301248550415, + "learning_rate": 6.621621621621621e-05, + "loss": 2.1325, + "mean_token_accuracy": 0.5782605841755867, + "num_tokens": 660406.0, + "step": 50 + }, + { + "epoch": 0.03654080389768575, + "grad_norm": 0.7315741181373596, + "learning_rate": 7.972972972972974e-05, + "loss": 1.9044, + "mean_token_accuracy": 0.6264914631843567, + "num_tokens": 795304.0, + "step": 60 + }, + { + "epoch": 0.04263093788063337, + "grad_norm": 0.6652920246124268, + "learning_rate": 9.324324324324324e-05, + "loss": 1.633, + "mean_token_accuracy": 0.6683938711881637, + "num_tokens": 934543.0, + "step": 70 + }, + { + "epoch": 0.048721071863580996, + "grad_norm": 0.6119660139083862, + "learning_rate": 0.00010675675675675677, + "loss": 1.543, + "mean_token_accuracy": 0.6834091022610664, + "num_tokens": 1070669.0, + "step": 80 + }, + { + "epoch": 0.05481120584652863, + "grad_norm": 0.591424286365509, + "learning_rate": 0.00012027027027027027, + "loss": 1.4154, + "mean_token_accuracy": 0.6991497233510018, + "num_tokens": 1211114.0, + "step": 90 + }, + { + "epoch": 0.06090133982947625, + "grad_norm": 0.5663530230522156, + "learning_rate": 0.0001337837837837838, + "loss": 1.3176, + "mean_token_accuracy": 0.7089206710457802, + "num_tokens": 1349584.0, + "step": 100 + }, + { + "epoch": 0.06699147381242387, + "grad_norm": 0.5881878137588501, + "learning_rate": 0.0001472972972972973, + "loss": 1.2293, + "mean_token_accuracy": 0.7254403859376908, + "num_tokens": 1487515.0, + "step": 110 + }, + { + "epoch": 0.0730816077953715, + "grad_norm": 0.7664394974708557, + "learning_rate": 0.00016081081081081083, + "loss": 1.1814, + "mean_token_accuracy": 0.7306812778115273, + "num_tokens": 1618603.0, + "step": 120 + }, + { + "epoch": 0.07917174177831912, + "grad_norm": 0.6155670881271362, + "learning_rate": 0.00017432432432432432, + "loss": 1.1967, + "mean_token_accuracy": 0.7284250959753991, + "num_tokens": 1750466.0, + "step": 130 + }, + { + "epoch": 0.08526187576126674, + "grad_norm": 0.5296258330345154, + "learning_rate": 0.00018783783783783784, + "loss": 1.0955, + "mean_token_accuracy": 0.7472824215888977, + "num_tokens": 1887913.0, + "step": 140 + }, + { + "epoch": 0.09135200974421437, + "grad_norm": 0.5564976334571838, + "learning_rate": 0.00019999998054550544, + "loss": 1.118, + "mean_token_accuracy": 0.7397311359643937, + "num_tokens": 2018579.0, + "step": 150 + }, + { + "epoch": 0.09744214372716199, + "grad_norm": 0.5301142930984497, + "learning_rate": 0.00019999764601633156, + "loss": 1.045, + "mean_token_accuracy": 0.7519380420446395, + "num_tokens": 2158851.0, + "step": 160 + }, + { + "epoch": 0.10353227771010962, + "grad_norm": 0.5949111580848694, + "learning_rate": 0.00019999142070388495, + "loss": 1.0497, + "mean_token_accuracy": 0.7520910769701004, + "num_tokens": 2296715.0, + "step": 170 + }, + { + "epoch": 0.10962241169305725, + "grad_norm": 0.6169262528419495, + "learning_rate": 0.0001999813048772986, + "loss": 1.0821, + "mean_token_accuracy": 0.7406247839331627, + "num_tokens": 2424756.0, + "step": 180 + }, + { + "epoch": 0.11571254567600488, + "grad_norm": 0.58912593126297, + "learning_rate": 0.00019996729897390057, + "loss": 1.0286, + "mean_token_accuracy": 0.7527454376220704, + "num_tokens": 2559362.0, + "step": 190 + }, + { + "epoch": 0.1218026796589525, + "grad_norm": 0.5084304213523865, + "learning_rate": 0.00019994940359919483, + "loss": 0.992, + "mean_token_accuracy": 0.7640391126275062, + "num_tokens": 2700231.0, + "step": 200 + }, + { + "epoch": 0.1278928136419001, + "grad_norm": 0.5790796279907227, + "learning_rate": 0.00019992761952683516, + "loss": 1.0146, + "mean_token_accuracy": 0.7554366230964661, + "num_tokens": 2831324.0, + "step": 210 + }, + { + "epoch": 0.13398294762484775, + "grad_norm": 0.5852051377296448, + "learning_rate": 0.00019990194769859188, + "loss": 0.978, + "mean_token_accuracy": 0.7612502485513687, + "num_tokens": 2967346.0, + "step": 220 + }, + { + "epoch": 0.14007308160779536, + "grad_norm": 0.5102785229682922, + "learning_rate": 0.00019987238922431088, + "loss": 0.9616, + "mean_token_accuracy": 0.7677591517567635, + "num_tokens": 3110936.0, + "step": 230 + }, + { + "epoch": 0.146163215590743, + "grad_norm": 0.5472669005393982, + "learning_rate": 0.00019983894538186576, + "loss": 0.9535, + "mean_token_accuracy": 0.76737689524889, + "num_tokens": 3247496.0, + "step": 240 + }, + { + "epoch": 0.15225334957369063, + "grad_norm": 0.5611053109169006, + "learning_rate": 0.0001998016176171026, + "loss": 0.9577, + "mean_token_accuracy": 0.7626092001795769, + "num_tokens": 3384178.0, + "step": 250 + }, + { + "epoch": 0.15834348355663824, + "grad_norm": 0.54055255651474, + "learning_rate": 0.0001997604075437774, + "loss": 0.9907, + "mean_token_accuracy": 0.7575223430991173, + "num_tokens": 3517617.0, + "step": 260 + }, + { + "epoch": 0.16443361753958588, + "grad_norm": 0.558316707611084, + "learning_rate": 0.0001997153169434864, + "loss": 0.944, + "mean_token_accuracy": 0.7664194419980049, + "num_tokens": 3662878.0, + "step": 270 + }, + { + "epoch": 0.1705237515225335, + "grad_norm": 0.49766939878463745, + "learning_rate": 0.0001996663477655889, + "loss": 0.9106, + "mean_token_accuracy": 0.7760038167238236, + "num_tokens": 3807411.0, + "step": 280 + }, + { + "epoch": 0.17661388550548113, + "grad_norm": 0.4953667223453522, + "learning_rate": 0.0001996135021271232, + "loss": 0.9687, + "mean_token_accuracy": 0.7605679705739021, + "num_tokens": 3936840.0, + "step": 290 + }, + { + "epoch": 0.18270401948842874, + "grad_norm": 0.5447947978973389, + "learning_rate": 0.00019955678231271484, + "loss": 0.9625, + "mean_token_accuracy": 0.7603292793035508, + "num_tokens": 4067826.0, + "step": 300 + }, + { + "epoch": 0.18879415347137637, + "grad_norm": 0.4665842056274414, + "learning_rate": 0.00019949619077447807, + "loss": 0.9372, + "mean_token_accuracy": 0.7676101759076118, + "num_tokens": 4205887.0, + "step": 310 + }, + { + "epoch": 0.19488428745432398, + "grad_norm": 0.515690267086029, + "learning_rate": 0.00019943173013190965, + "loss": 0.923, + "mean_token_accuracy": 0.7708473294973374, + "num_tokens": 4342894.0, + "step": 320 + }, + { + "epoch": 0.20097442143727162, + "grad_norm": 0.5831382274627686, + "learning_rate": 0.00019936340317177565, + "loss": 0.9203, + "mean_token_accuracy": 0.7708552837371826, + "num_tokens": 4477651.0, + "step": 330 + }, + { + "epoch": 0.20706455542021923, + "grad_norm": 0.6162773966789246, + "learning_rate": 0.0001992912128479911, + "loss": 0.916, + "mean_token_accuracy": 0.7702088996767997, + "num_tokens": 4610746.0, + "step": 340 + }, + { + "epoch": 0.21315468940316687, + "grad_norm": 0.5172462463378906, + "learning_rate": 0.00019921516228149207, + "loss": 0.8942, + "mean_token_accuracy": 0.7741821393370628, + "num_tokens": 4751175.0, + "step": 350 + }, + { + "epoch": 0.2192448233861145, + "grad_norm": 0.5890468955039978, + "learning_rate": 0.0001991352547601009, + "loss": 0.9229, + "mean_token_accuracy": 0.7691043332219124, + "num_tokens": 4882328.0, + "step": 360 + }, + { + "epoch": 0.22533495736906212, + "grad_norm": 0.5522404909133911, + "learning_rate": 0.00019905149373838408, + "loss": 0.9294, + "mean_token_accuracy": 0.7646071568131447, + "num_tokens": 5012181.0, + "step": 370 + }, + { + "epoch": 0.23142509135200975, + "grad_norm": 0.5349445939064026, + "learning_rate": 0.0001989638828375028, + "loss": 0.8797, + "mean_token_accuracy": 0.7771721839904785, + "num_tokens": 5151133.0, + "step": 380 + }, + { + "epoch": 0.23751522533495736, + "grad_norm": 0.531052827835083, + "learning_rate": 0.00019887242584505635, + "loss": 0.9221, + "mean_token_accuracy": 0.7678465083241462, + "num_tokens": 5279790.0, + "step": 390 + }, + { + "epoch": 0.243605359317905, + "grad_norm": 0.5126324892044067, + "learning_rate": 0.00019877712671491864, + "loss": 0.8862, + "mean_token_accuracy": 0.7739894777536392, + "num_tokens": 5412390.0, + "step": 400 + }, + { + "epoch": 0.2496954933008526, + "grad_norm": 0.5111438632011414, + "learning_rate": 0.00019867798956706693, + "loss": 0.9005, + "mean_token_accuracy": 0.7721902653574944, + "num_tokens": 5545801.0, + "step": 410 + }, + { + "epoch": 0.2557856272838002, + "grad_norm": 0.5488138794898987, + "learning_rate": 0.00019857501868740402, + "loss": 0.8988, + "mean_token_accuracy": 0.7690282896161079, + "num_tokens": 5673758.0, + "step": 420 + }, + { + "epoch": 0.2618757612667479, + "grad_norm": 0.5497994422912598, + "learning_rate": 0.0001984682185275727, + "loss": 0.8802, + "mean_token_accuracy": 0.7780183687806129, + "num_tokens": 5813158.0, + "step": 430 + }, + { + "epoch": 0.2679658952496955, + "grad_norm": 0.5478431582450867, + "learning_rate": 0.0001983575937047635, + "loss": 0.865, + "mean_token_accuracy": 0.7785944610834121, + "num_tokens": 5947367.0, + "step": 440 + }, + { + "epoch": 0.2740560292326431, + "grad_norm": 0.5188766717910767, + "learning_rate": 0.00019824314900151487, + "loss": 0.8798, + "mean_token_accuracy": 0.7752803862094879, + "num_tokens": 6081060.0, + "step": 450 + }, + { + "epoch": 0.2801461632155907, + "grad_norm": 0.530222475528717, + "learning_rate": 0.00019812488936550666, + "loss": 0.8628, + "mean_token_accuracy": 0.7801630645990372, + "num_tokens": 6217834.0, + "step": 460 + }, + { + "epoch": 0.2862362971985384, + "grad_norm": 0.5987964868545532, + "learning_rate": 0.00019800281990934614, + "loss": 0.8775, + "mean_token_accuracy": 0.7760324433445931, + "num_tokens": 6350451.0, + "step": 470 + }, + { + "epoch": 0.292326431181486, + "grad_norm": 0.5468559265136719, + "learning_rate": 0.0001978769459103468, + "loss": 0.8721, + "mean_token_accuracy": 0.7794204503297806, + "num_tokens": 6484738.0, + "step": 480 + }, + { + "epoch": 0.2984165651644336, + "grad_norm": 0.5541098117828369, + "learning_rate": 0.0001977472728103005, + "loss": 0.8785, + "mean_token_accuracy": 0.7767582029104233, + "num_tokens": 6619313.0, + "step": 490 + }, + { + "epoch": 0.30450669914738127, + "grad_norm": 0.5134281516075134, + "learning_rate": 0.0001976138062152419, + "loss": 0.8717, + "mean_token_accuracy": 0.7752724394202233, + "num_tokens": 6753195.0, + "step": 500 + }, + { + "epoch": 0.3105968331303289, + "grad_norm": 0.49164435267448425, + "learning_rate": 0.00019747655189520633, + "loss": 0.8757, + "mean_token_accuracy": 0.7768464118242264, + "num_tokens": 6890448.0, + "step": 510 + }, + { + "epoch": 0.3166869671132765, + "grad_norm": 0.5899345278739929, + "learning_rate": 0.00019733551578398023, + "loss": 0.8322, + "mean_token_accuracy": 0.7859320402145386, + "num_tokens": 7027488.0, + "step": 520 + }, + { + "epoch": 0.3227771010962241, + "grad_norm": 0.6552841663360596, + "learning_rate": 0.0001971907039788447, + "loss": 0.861, + "mean_token_accuracy": 0.7770532324910164, + "num_tokens": 7161184.0, + "step": 530 + }, + { + "epoch": 0.32886723507917176, + "grad_norm": 0.5038822889328003, + "learning_rate": 0.0001970421227403117, + "loss": 0.8825, + "mean_token_accuracy": 0.775890800356865, + "num_tokens": 7294399.0, + "step": 540 + }, + { + "epoch": 0.33495736906211937, + "grad_norm": 0.5094267129898071, + "learning_rate": 0.00019688977849185378, + "loss": 0.8598, + "mean_token_accuracy": 0.7817838475108146, + "num_tokens": 7427183.0, + "step": 550 + }, + { + "epoch": 0.341047503045067, + "grad_norm": 0.5282809138298035, + "learning_rate": 0.00019673367781962594, + "loss": 0.8463, + "mean_token_accuracy": 0.7812959104776382, + "num_tokens": 7561734.0, + "step": 560 + }, + { + "epoch": 0.3471376370280146, + "grad_norm": 0.45355409383773804, + "learning_rate": 0.00019657382747218123, + "loss": 0.8207, + "mean_token_accuracy": 0.7888262197375298, + "num_tokens": 7706228.0, + "step": 570 + }, + { + "epoch": 0.35322777101096225, + "grad_norm": 0.5162333846092224, + "learning_rate": 0.00019641023436017883, + "loss": 0.8235, + "mean_token_accuracy": 0.7868947923183441, + "num_tokens": 7846684.0, + "step": 580 + }, + { + "epoch": 0.35931790499390986, + "grad_norm": 0.5194632411003113, + "learning_rate": 0.00019624290555608526, + "loss": 0.8129, + "mean_token_accuracy": 0.7884069249033928, + "num_tokens": 7986811.0, + "step": 590 + }, + { + "epoch": 0.3654080389768575, + "grad_norm": 0.5494846701622009, + "learning_rate": 0.00019607184829386882, + "loss": 0.8084, + "mean_token_accuracy": 0.7874000474810601, + "num_tokens": 8124538.0, + "step": 600 + }, + { + "epoch": 0.37149817295980514, + "grad_norm": 0.5368776917457581, + "learning_rate": 0.0001958970699686866, + "loss": 0.8225, + "mean_token_accuracy": 0.783010233938694, + "num_tokens": 8260529.0, + "step": 610 + }, + { + "epoch": 0.37758830694275275, + "grad_norm": 0.6229024529457092, + "learning_rate": 0.00019571857813656496, + "loss": 0.8786, + "mean_token_accuracy": 0.7753148928284646, + "num_tokens": 8389042.0, + "step": 620 + }, + { + "epoch": 0.38367844092570036, + "grad_norm": 0.5601000785827637, + "learning_rate": 0.00019553638051407279, + "loss": 0.8909, + "mean_token_accuracy": 0.7745720192790031, + "num_tokens": 8513603.0, + "step": 630 + }, + { + "epoch": 0.38976857490864797, + "grad_norm": 0.438970685005188, + "learning_rate": 0.0001953504849779879, + "loss": 0.8085, + "mean_token_accuracy": 0.7871840804815292, + "num_tokens": 8652970.0, + "step": 640 + }, + { + "epoch": 0.39585870889159563, + "grad_norm": 0.5505132079124451, + "learning_rate": 0.00019516089956495648, + "loss": 0.8102, + "mean_token_accuracy": 0.7869585514068603, + "num_tokens": 8792103.0, + "step": 650 + }, + { + "epoch": 0.40194884287454324, + "grad_norm": 0.5447221398353577, + "learning_rate": 0.00019496763247114581, + "loss": 0.8336, + "mean_token_accuracy": 0.7816034242510795, + "num_tokens": 8926853.0, + "step": 660 + }, + { + "epoch": 0.40803897685749085, + "grad_norm": 0.4652746915817261, + "learning_rate": 0.00019477069205188965, + "loss": 0.8383, + "mean_token_accuracy": 0.7826304718852043, + "num_tokens": 9059592.0, + "step": 670 + }, + { + "epoch": 0.41412911084043846, + "grad_norm": 0.42363590002059937, + "learning_rate": 0.00019457008682132726, + "loss": 0.847, + "mean_token_accuracy": 0.7810002073645592, + "num_tokens": 9193062.0, + "step": 680 + }, + { + "epoch": 0.42021924482338613, + "grad_norm": 0.5209478735923767, + "learning_rate": 0.00019436582545203518, + "loss": 0.8766, + "mean_token_accuracy": 0.7733785718679428, + "num_tokens": 9315805.0, + "step": 690 + }, + { + "epoch": 0.42630937880633374, + "grad_norm": 0.5176642537117004, + "learning_rate": 0.00019415791677465237, + "loss": 0.8155, + "mean_token_accuracy": 0.7869213685393334, + "num_tokens": 9448863.0, + "step": 700 + }, + { + "epoch": 0.43239951278928135, + "grad_norm": 0.4531058371067047, + "learning_rate": 0.00019394636977749843, + "loss": 0.8096, + "mean_token_accuracy": 0.7903949975967407, + "num_tokens": 9589382.0, + "step": 710 + }, + { + "epoch": 0.438489646772229, + "grad_norm": 0.5651549100875854, + "learning_rate": 0.000193731193606185, + "loss": 0.8263, + "mean_token_accuracy": 0.7823062822222709, + "num_tokens": 9723562.0, + "step": 720 + }, + { + "epoch": 0.4445797807551766, + "grad_norm": 0.5377989411354065, + "learning_rate": 0.00019351239756322031, + "loss": 0.7993, + "mean_token_accuracy": 0.7908329650759697, + "num_tokens": 9859255.0, + "step": 730 + }, + { + "epoch": 0.45066991473812423, + "grad_norm": 0.5420868396759033, + "learning_rate": 0.00019328999110760722, + "loss": 0.8461, + "mean_token_accuracy": 0.7780480548739434, + "num_tokens": 9981578.0, + "step": 740 + }, + { + "epoch": 0.45676004872107184, + "grad_norm": 0.4889216125011444, + "learning_rate": 0.000193063983854434, + "loss": 0.7652, + "mean_token_accuracy": 0.7959530428051949, + "num_tokens": 10122922.0, + "step": 750 + }, + { + "epoch": 0.4628501827040195, + "grad_norm": 0.5044087767601013, + "learning_rate": 0.00019283438557445893, + "loss": 0.824, + "mean_token_accuracy": 0.7845935523509979, + "num_tokens": 10252854.0, + "step": 760 + }, + { + "epoch": 0.4689403166869671, + "grad_norm": 0.5286466479301453, + "learning_rate": 0.00019260120619368773, + "loss": 0.815, + "mean_token_accuracy": 0.7850656941533088, + "num_tokens": 10385075.0, + "step": 770 + }, + { + "epoch": 0.47503045066991473, + "grad_norm": 0.5441628694534302, + "learning_rate": 0.00019236445579294437, + "loss": 0.8048, + "mean_token_accuracy": 0.7876680314540863, + "num_tokens": 10520011.0, + "step": 780 + }, + { + "epoch": 0.48112058465286234, + "grad_norm": 0.49002447724342346, + "learning_rate": 0.0001921241446074355, + "loss": 0.8059, + "mean_token_accuracy": 0.7898563235998154, + "num_tokens": 10652488.0, + "step": 790 + }, + { + "epoch": 0.48721071863581, + "grad_norm": 0.4479144811630249, + "learning_rate": 0.0001918802830263077, + "loss": 0.7913, + "mean_token_accuracy": 0.7928732186555862, + "num_tokens": 10785974.0, + "step": 800 + }, + { + "epoch": 0.4933008526187576, + "grad_norm": 0.5007497668266296, + "learning_rate": 0.00019163288159219853, + "loss": 0.8083, + "mean_token_accuracy": 0.7893043681979179, + "num_tokens": 10920950.0, + "step": 810 + }, + { + "epoch": 0.4993909866017052, + "grad_norm": 0.5289483070373535, + "learning_rate": 0.00019138195100078064, + "loss": 0.8033, + "mean_token_accuracy": 0.7864485770463944, + "num_tokens": 11056380.0, + "step": 820 + }, + { + "epoch": 0.5054811205846529, + "grad_norm": 0.5604159832000732, + "learning_rate": 0.0001911275021002994, + "loss": 0.7652, + "mean_token_accuracy": 0.7946401730179786, + "num_tokens": 11196074.0, + "step": 830 + }, + { + "epoch": 0.5115712545676004, + "grad_norm": 0.43645399808883667, + "learning_rate": 0.00019086954589110397, + "loss": 0.7724, + "mean_token_accuracy": 0.7990294560790062, + "num_tokens": 11337990.0, + "step": 840 + }, + { + "epoch": 0.5176613885505481, + "grad_norm": 0.43992146849632263, + "learning_rate": 0.0001906080935251716, + "loss": 0.7612, + "mean_token_accuracy": 0.7999786615371705, + "num_tokens": 11481565.0, + "step": 850 + }, + { + "epoch": 0.5237515225334958, + "grad_norm": 0.5595120191574097, + "learning_rate": 0.0001903431563056256, + "loss": 0.8266, + "mean_token_accuracy": 0.7859750911593437, + "num_tokens": 11611714.0, + "step": 860 + }, + { + "epoch": 0.5298416565164433, + "grad_norm": 0.5001987218856812, + "learning_rate": 0.0001900747456862467, + "loss": 0.8506, + "mean_token_accuracy": 0.779585388302803, + "num_tokens": 11736573.0, + "step": 870 + }, + { + "epoch": 0.535931790499391, + "grad_norm": 0.430147647857666, + "learning_rate": 0.00018980287327097784, + "loss": 0.7707, + "mean_token_accuracy": 0.795211361348629, + "num_tokens": 11876859.0, + "step": 880 + }, + { + "epoch": 0.5420219244823387, + "grad_norm": 0.5346289873123169, + "learning_rate": 0.00018952755081342245, + "loss": 0.8057, + "mean_token_accuracy": 0.7871127843856811, + "num_tokens": 12007654.0, + "step": 890 + }, + { + "epoch": 0.5481120584652862, + "grad_norm": 0.46072253584861755, + "learning_rate": 0.00018924879021633653, + "loss": 0.7924, + "mean_token_accuracy": 0.7913773030042648, + "num_tokens": 12140520.0, + "step": 900 + }, + { + "epoch": 0.5542021924482339, + "grad_norm": 0.4803653955459595, + "learning_rate": 0.00018896660353111375, + "loss": 0.8398, + "mean_token_accuracy": 0.7807079553604126, + "num_tokens": 12267219.0, + "step": 910 + }, + { + "epoch": 0.5602923264311814, + "grad_norm": 0.5219636559486389, + "learning_rate": 0.0001886810029572647, + "loss": 0.7612, + "mean_token_accuracy": 0.7993015512824059, + "num_tokens": 12404646.0, + "step": 920 + }, + { + "epoch": 0.5663824604141291, + "grad_norm": 0.501483142375946, + "learning_rate": 0.00018839200084188936, + "loss": 0.7953, + "mean_token_accuracy": 0.787814213335514, + "num_tokens": 12538219.0, + "step": 930 + }, + { + "epoch": 0.5724725943970768, + "grad_norm": 0.47334522008895874, + "learning_rate": 0.00018809960967914346, + "loss": 0.789, + "mean_token_accuracy": 0.7928574904799461, + "num_tokens": 12673805.0, + "step": 940 + }, + { + "epoch": 0.5785627283800243, + "grad_norm": 0.5057492852210999, + "learning_rate": 0.00018780384210969806, + "loss": 0.7746, + "mean_token_accuracy": 0.7947553545236588, + "num_tokens": 12811727.0, + "step": 950 + }, + { + "epoch": 0.584652862362972, + "grad_norm": 0.5179910659790039, + "learning_rate": 0.00018750471092019325, + "loss": 0.7962, + "mean_token_accuracy": 0.7905686929821968, + "num_tokens": 12947641.0, + "step": 960 + }, + { + "epoch": 0.5907429963459196, + "grad_norm": 0.45797088742256165, + "learning_rate": 0.00018720222904268543, + "loss": 0.7678, + "mean_token_accuracy": 0.7969774708151818, + "num_tokens": 13083869.0, + "step": 970 + }, + { + "epoch": 0.5968331303288672, + "grad_norm": 0.48360612988471985, + "learning_rate": 0.00018689640955408803, + "loss": 0.7996, + "mean_token_accuracy": 0.7885591968894005, + "num_tokens": 13211807.0, + "step": 980 + }, + { + "epoch": 0.6029232643118149, + "grad_norm": 0.4378497004508972, + "learning_rate": 0.00018658726567560635, + "loss": 0.7652, + "mean_token_accuracy": 0.7969291344285011, + "num_tokens": 13351856.0, + "step": 990 + }, + { + "epoch": 0.6090133982947625, + "grad_norm": 0.4857536852359772, + "learning_rate": 0.00018627481077216577, + "loss": 0.7786, + "mean_token_accuracy": 0.7914443418383599, + "num_tokens": 13486443.0, + "step": 1000 + }, + { + "epoch": 0.6151035322777101, + "grad_norm": 0.5233064293861389, + "learning_rate": 0.0001859590583518343, + "loss": 0.8241, + "mean_token_accuracy": 0.7811850637197495, + "num_tokens": 13612035.0, + "step": 1010 + }, + { + "epoch": 0.6211936662606578, + "grad_norm": 0.5328738689422607, + "learning_rate": 0.00018564002206523816, + "loss": 0.7502, + "mean_token_accuracy": 0.7993430674076081, + "num_tokens": 13756509.0, + "step": 1020 + }, + { + "epoch": 0.6272838002436053, + "grad_norm": 0.47962310910224915, + "learning_rate": 0.000185317715704972, + "loss": 0.7984, + "mean_token_accuracy": 0.7864531084895134, + "num_tokens": 13883033.0, + "step": 1030 + }, + { + "epoch": 0.633373934226553, + "grad_norm": 0.5685893893241882, + "learning_rate": 0.0001849921532050024, + "loss": 0.7869, + "mean_token_accuracy": 0.7909937381744385, + "num_tokens": 14015234.0, + "step": 1040 + }, + { + "epoch": 0.6394640682095006, + "grad_norm": 0.49146631360054016, + "learning_rate": 0.00018466334864006566, + "loss": 0.7952, + "mean_token_accuracy": 0.7878949210047722, + "num_tokens": 14149319.0, + "step": 1050 + }, + { + "epoch": 0.6455542021924482, + "grad_norm": 0.5556225776672363, + "learning_rate": 0.0001843313162250591, + "loss": 0.7524, + "mean_token_accuracy": 0.7994373366236687, + "num_tokens": 14286868.0, + "step": 1060 + }, + { + "epoch": 0.6516443361753959, + "grad_norm": 0.511379063129425, + "learning_rate": 0.00018399607031442666, + "loss": 0.7929, + "mean_token_accuracy": 0.7921562284231186, + "num_tokens": 14418354.0, + "step": 1070 + }, + { + "epoch": 0.6577344701583435, + "grad_norm": 0.5019840598106384, + "learning_rate": 0.00018365762540153836, + "loss": 0.758, + "mean_token_accuracy": 0.7989353060722351, + "num_tokens": 14553174.0, + "step": 1080 + }, + { + "epoch": 0.6638246041412911, + "grad_norm": 0.6032467484474182, + "learning_rate": 0.00018331599611806366, + "loss": 0.7888, + "mean_token_accuracy": 0.7903819754719734, + "num_tokens": 14681393.0, + "step": 1090 + }, + { + "epoch": 0.6699147381242387, + "grad_norm": 0.5369830131530762, + "learning_rate": 0.00018297119723333877, + "loss": 0.765, + "mean_token_accuracy": 0.7950262635946274, + "num_tokens": 14814565.0, + "step": 1100 + }, + { + "epoch": 0.6760048721071864, + "grad_norm": 0.5289803743362427, + "learning_rate": 0.00018262324365372846, + "loss": 0.7496, + "mean_token_accuracy": 0.8032818242907525, + "num_tokens": 14954351.0, + "step": 1110 + }, + { + "epoch": 0.682095006090134, + "grad_norm": 0.5440439581871033, + "learning_rate": 0.0001822721504219814, + "loss": 0.7432, + "mean_token_accuracy": 0.799126236140728, + "num_tokens": 15094879.0, + "step": 1120 + }, + { + "epoch": 0.6881851400730816, + "grad_norm": 0.46225935220718384, + "learning_rate": 0.00018191793271657978, + "loss": 0.7513, + "mean_token_accuracy": 0.8022688791155815, + "num_tokens": 15234906.0, + "step": 1130 + }, + { + "epoch": 0.6942752740560292, + "grad_norm": 0.5592020750045776, + "learning_rate": 0.0001815606058510833, + "loss": 0.7583, + "mean_token_accuracy": 0.7984497547149658, + "num_tokens": 15373526.0, + "step": 1140 + }, + { + "epoch": 0.7003654080389768, + "grad_norm": 0.525090217590332, + "learning_rate": 0.00018120018527346702, + "loss": 0.7254, + "mean_token_accuracy": 0.8070619881153107, + "num_tokens": 15516264.0, + "step": 1150 + }, + { + "epoch": 0.7064555420219245, + "grad_norm": 0.5380759239196777, + "learning_rate": 0.00018083668656545355, + "loss": 0.8041, + "mean_token_accuracy": 0.7866759791970253, + "num_tokens": 15640444.0, + "step": 1160 + }, + { + "epoch": 0.7125456760048721, + "grad_norm": 0.47815701365470886, + "learning_rate": 0.00018047012544183938, + "loss": 0.7604, + "mean_token_accuracy": 0.796156468987465, + "num_tokens": 15778070.0, + "step": 1170 + }, + { + "epoch": 0.7186358099878197, + "grad_norm": 0.5380450487136841, + "learning_rate": 0.00018010051774981553, + "loss": 0.8135, + "mean_token_accuracy": 0.7842124432325364, + "num_tokens": 15899739.0, + "step": 1180 + }, + { + "epoch": 0.7247259439707674, + "grad_norm": 0.5047502517700195, + "learning_rate": 0.00017972787946828246, + "loss": 0.7642, + "mean_token_accuracy": 0.7989341139793396, + "num_tokens": 16035805.0, + "step": 1190 + }, + { + "epoch": 0.730816077953715, + "grad_norm": 0.5440967679023743, + "learning_rate": 0.00017935222670715918, + "loss": 0.735, + "mean_token_accuracy": 0.8048294603824615, + "num_tokens": 16172541.0, + "step": 1200 + }, + { + "epoch": 0.7369062119366626, + "grad_norm": 0.4766077399253845, + "learning_rate": 0.000178973575706687, + "loss": 0.805, + "mean_token_accuracy": 0.7871790423989296, + "num_tokens": 16296988.0, + "step": 1210 + }, + { + "epoch": 0.7429963459196103, + "grad_norm": 0.4153214991092682, + "learning_rate": 0.00017859194283672704, + "loss": 0.7635, + "mean_token_accuracy": 0.7964595645666123, + "num_tokens": 16432022.0, + "step": 1220 + }, + { + "epoch": 0.7490864799025578, + "grad_norm": 0.4698518216609955, + "learning_rate": 0.00017820734459605302, + "loss": 0.7397, + "mean_token_accuracy": 0.8046972885727882, + "num_tokens": 16572880.0, + "step": 1230 + }, + { + "epoch": 0.7551766138855055, + "grad_norm": 0.46101540327072144, + "learning_rate": 0.00017781979761163756, + "loss": 0.7174, + "mean_token_accuracy": 0.8066875368356705, + "num_tokens": 16714419.0, + "step": 1240 + }, + { + "epoch": 0.761266747868453, + "grad_norm": 0.5313341021537781, + "learning_rate": 0.00017742931863793358, + "loss": 0.7797, + "mean_token_accuracy": 0.7911526098847389, + "num_tokens": 16838285.0, + "step": 1250 + }, + { + "epoch": 0.7673568818514007, + "grad_norm": 0.4627362787723541, + "learning_rate": 0.00017703592455614998, + "loss": 0.7626, + "mean_token_accuracy": 0.7970306649804115, + "num_tokens": 16976065.0, + "step": 1260 + }, + { + "epoch": 0.7734470158343484, + "grad_norm": 0.5429073572158813, + "learning_rate": 0.00017663963237352177, + "loss": 0.7398, + "mean_token_accuracy": 0.8005403786897659, + "num_tokens": 17112901.0, + "step": 1270 + }, + { + "epoch": 0.7795371498172959, + "grad_norm": 0.6781270503997803, + "learning_rate": 0.00017624045922257471, + "loss": 0.7607, + "mean_token_accuracy": 0.7946217939257622, + "num_tokens": 17245480.0, + "step": 1280 + }, + { + "epoch": 0.7856272838002436, + "grad_norm": 0.5227305293083191, + "learning_rate": 0.00017583842236038483, + "loss": 0.7217, + "mean_token_accuracy": 0.8064659267663956, + "num_tokens": 17387171.0, + "step": 1290 + }, + { + "epoch": 0.7917174177831913, + "grad_norm": 0.49253156781196594, + "learning_rate": 0.0001754335391678323, + "loss": 0.7652, + "mean_token_accuracy": 0.7960015773773194, + "num_tokens": 17521164.0, + "step": 1300 + }, + { + "epoch": 0.7978075517661388, + "grad_norm": 0.5103631615638733, + "learning_rate": 0.00017502582714884997, + "loss": 0.7435, + "mean_token_accuracy": 0.7995276898145676, + "num_tokens": 17657818.0, + "step": 1310 + }, + { + "epoch": 0.8038976857490865, + "grad_norm": 0.5531247854232788, + "learning_rate": 0.00017461530392966665, + "loss": 0.7986, + "mean_token_accuracy": 0.7892467245459557, + "num_tokens": 17784361.0, + "step": 1320 + }, + { + "epoch": 0.8099878197320342, + "grad_norm": 0.4574586749076843, + "learning_rate": 0.00017420198725804517, + "loss": 0.6889, + "mean_token_accuracy": 0.8135112956166267, + "num_tokens": 17929664.0, + "step": 1330 + }, + { + "epoch": 0.8160779537149817, + "grad_norm": 0.4734383225440979, + "learning_rate": 0.00017378589500251498, + "loss": 0.7308, + "mean_token_accuracy": 0.8029947131872177, + "num_tokens": 18071182.0, + "step": 1340 + }, + { + "epoch": 0.8221680876979294, + "grad_norm": 0.5192279815673828, + "learning_rate": 0.00017336704515159986, + "loss": 0.7444, + "mean_token_accuracy": 0.8012512847781181, + "num_tokens": 18211136.0, + "step": 1350 + }, + { + "epoch": 0.8282582216808769, + "grad_norm": 0.5378620624542236, + "learning_rate": 0.00017294545581303996, + "loss": 0.7459, + "mean_token_accuracy": 0.7981989249587059, + "num_tokens": 18340645.0, + "step": 1360 + }, + { + "epoch": 0.8343483556638246, + "grad_norm": 0.4879571497440338, + "learning_rate": 0.00017252114521300918, + "loss": 0.7877, + "mean_token_accuracy": 0.7891893342137337, + "num_tokens": 18465733.0, + "step": 1370 + }, + { + "epoch": 0.8404384896467723, + "grad_norm": 0.5297388434410095, + "learning_rate": 0.00017209413169532717, + "loss": 0.7586, + "mean_token_accuracy": 0.797142505645752, + "num_tokens": 18598979.0, + "step": 1380 + }, + { + "epoch": 0.8465286236297198, + "grad_norm": 0.5308396220207214, + "learning_rate": 0.00017166443372066618, + "loss": 0.7387, + "mean_token_accuracy": 0.80123979896307, + "num_tokens": 18735919.0, + "step": 1390 + }, + { + "epoch": 0.8526187576126675, + "grad_norm": 0.49988579750061035, + "learning_rate": 0.0001712320698657532, + "loss": 0.7425, + "mean_token_accuracy": 0.7996803268790245, + "num_tokens": 18870877.0, + "step": 1400 + }, + { + "epoch": 0.8587088915956151, + "grad_norm": 0.5971361994743347, + "learning_rate": 0.0001707970588225665, + "loss": 0.7691, + "mean_token_accuracy": 0.7922965154051781, + "num_tokens": 19000943.0, + "step": 1410 + }, + { + "epoch": 0.8647990255785627, + "grad_norm": 0.5141698718070984, + "learning_rate": 0.00017035941939752802, + "loss": 0.7203, + "mean_token_accuracy": 0.8036229625344277, + "num_tokens": 19135039.0, + "step": 1420 + }, + { + "epoch": 0.8708891595615104, + "grad_norm": 0.4647749066352844, + "learning_rate": 0.0001699191705106898, + "loss": 0.7136, + "mean_token_accuracy": 0.8064323276281357, + "num_tokens": 19274069.0, + "step": 1430 + }, + { + "epoch": 0.876979293544458, + "grad_norm": 0.5511934161186218, + "learning_rate": 0.00016947633119491633, + "loss": 0.7455, + "mean_token_accuracy": 0.7985599264502525, + "num_tokens": 19409679.0, + "step": 1440 + }, + { + "epoch": 0.8830694275274056, + "grad_norm": 0.4936945140361786, + "learning_rate": 0.00016903092059506182, + "loss": 0.7087, + "mean_token_accuracy": 0.806523185968399, + "num_tokens": 19547419.0, + "step": 1450 + }, + { + "epoch": 0.8891595615103532, + "grad_norm": 0.5227787494659424, + "learning_rate": 0.00016858295796714213, + "loss": 0.7739, + "mean_token_accuracy": 0.7941467314958572, + "num_tokens": 19674455.0, + "step": 1460 + }, + { + "epoch": 0.8952496954933008, + "grad_norm": 0.5046219825744629, + "learning_rate": 0.00016813246267750282, + "loss": 0.7361, + "mean_token_accuracy": 0.8008369222283364, + "num_tokens": 19809861.0, + "step": 1470 + }, + { + "epoch": 0.9013398294762485, + "grad_norm": 0.4827081263065338, + "learning_rate": 0.00016767945420198142, + "loss": 0.7464, + "mean_token_accuracy": 0.7986427888274192, + "num_tokens": 19940696.0, + "step": 1480 + }, + { + "epoch": 0.9074299634591961, + "grad_norm": 0.4970889687538147, + "learning_rate": 0.00016722395212506567, + "loss": 0.7528, + "mean_token_accuracy": 0.7965970665216446, + "num_tokens": 20070686.0, + "step": 1490 + }, + { + "epoch": 0.9135200974421437, + "grad_norm": 0.44478070735931396, + "learning_rate": 0.00016676597613904693, + "loss": 0.7185, + "mean_token_accuracy": 0.8081388726830483, + "num_tokens": 20210260.0, + "step": 1500 + }, + { + "epoch": 0.9196102314250914, + "grad_norm": 0.506136417388916, + "learning_rate": 0.00016630554604316866, + "loss": 0.7395, + "mean_token_accuracy": 0.8003876298666001, + "num_tokens": 20346235.0, + "step": 1510 + }, + { + "epoch": 0.925700365408039, + "grad_norm": 0.500946044921875, + "learning_rate": 0.00016584268174277053, + "loss": 0.6889, + "mean_token_accuracy": 0.8124501362442971, + "num_tokens": 20481248.0, + "step": 1520 + }, + { + "epoch": 0.9317904993909866, + "grad_norm": 0.48528990149497986, + "learning_rate": 0.00016537740324842795, + "loss": 0.7227, + "mean_token_accuracy": 0.8041250064969063, + "num_tokens": 20613531.0, + "step": 1530 + }, + { + "epoch": 0.9378806333739342, + "grad_norm": 0.5070951581001282, + "learning_rate": 0.00016490973067508674, + "loss": 0.7091, + "mean_token_accuracy": 0.8082544595003128, + "num_tokens": 20750784.0, + "step": 1540 + }, + { + "epoch": 0.9439707673568819, + "grad_norm": 0.5583120584487915, + "learning_rate": 0.0001644396842411939, + "loss": 0.7405, + "mean_token_accuracy": 0.7992320343852043, + "num_tokens": 20883646.0, + "step": 1550 + }, + { + "epoch": 0.9500609013398295, + "grad_norm": 0.5099635124206543, + "learning_rate": 0.00016396728426782312, + "loss": 0.7103, + "mean_token_accuracy": 0.8091216519474983, + "num_tokens": 21025143.0, + "step": 1560 + }, + { + "epoch": 0.9561510353227771, + "grad_norm": 0.5777808427810669, + "learning_rate": 0.00016349255117779652, + "loss": 0.7245, + "mean_token_accuracy": 0.8023119494318962, + "num_tokens": 21160014.0, + "step": 1570 + }, + { + "epoch": 0.9622411693057247, + "grad_norm": 0.5206162333488464, + "learning_rate": 0.0001630155054948016, + "loss": 0.7185, + "mean_token_accuracy": 0.8069521963596344, + "num_tokens": 21299094.0, + "step": 1580 + }, + { + "epoch": 0.9683313032886723, + "grad_norm": 0.5763202905654907, + "learning_rate": 0.00016253616784250415, + "loss": 0.7677, + "mean_token_accuracy": 0.7927820891141891, + "num_tokens": 21429252.0, + "step": 1590 + }, + { + "epoch": 0.97442143727162, + "grad_norm": 0.5068426728248596, + "learning_rate": 0.00016205455894365627, + "loss": 0.7673, + "mean_token_accuracy": 0.794715291261673, + "num_tokens": 21556200.0, + "step": 1600 + }, + { + "epoch": 0.9805115712545676, + "grad_norm": 0.46094459295272827, + "learning_rate": 0.0001615706996192009, + "loss": 0.771, + "mean_token_accuracy": 0.7921045809984207, + "num_tokens": 21681524.0, + "step": 1610 + }, + { + "epoch": 0.9866017052375152, + "grad_norm": 0.5063546299934387, + "learning_rate": 0.00016108461078737148, + "loss": 0.7383, + "mean_token_accuracy": 0.800596435368061, + "num_tokens": 21814109.0, + "step": 1620 + }, + { + "epoch": 0.9926918392204629, + "grad_norm": 0.5418652296066284, + "learning_rate": 0.0001605963134627876, + "loss": 0.7431, + "mean_token_accuracy": 0.7994748756289483, + "num_tokens": 21947346.0, + "step": 1630 + }, + { + "epoch": 0.9987819732034104, + "grad_norm": 0.6195595264434814, + "learning_rate": 0.0001601058287555465, + "loss": 0.7294, + "mean_token_accuracy": 0.8030684441328049, + "num_tokens": 22081340.0, + "step": 1640 + }, + { + "epoch": 1.004872107186358, + "grad_norm": 0.5930359363555908, + "learning_rate": 0.00015961317787031054, + "loss": 0.7387, + "mean_token_accuracy": 0.8013696864247322, + "num_tokens": 22206441.0, + "step": 1650 + }, + { + "epoch": 1.0109622411693058, + "grad_norm": 0.4926474094390869, + "learning_rate": 0.00015911838210539038, + "loss": 0.6743, + "mean_token_accuracy": 0.8141208037734031, + "num_tokens": 22344898.0, + "step": 1660 + }, + { + "epoch": 1.0170523751522533, + "grad_norm": 0.5331000685691833, + "learning_rate": 0.0001586214628518242, + "loss": 0.7033, + "mean_token_accuracy": 0.807385990023613, + "num_tokens": 22483135.0, + "step": 1670 + }, + { + "epoch": 1.0231425091352009, + "grad_norm": 0.5267267227172852, + "learning_rate": 0.0001581224415924531, + "loss": 0.6717, + "mean_token_accuracy": 0.8178876608610153, + "num_tokens": 22617934.0, + "step": 1680 + }, + { + "epoch": 1.0292326431181487, + "grad_norm": 0.5864041447639465, + "learning_rate": 0.00015762133990099205, + "loss": 0.7421, + "mean_token_accuracy": 0.7981289237737655, + "num_tokens": 22745190.0, + "step": 1690 + }, + { + "epoch": 1.0353227771010962, + "grad_norm": 0.45681944489479065, + "learning_rate": 0.00015711817944109738, + "loss": 0.6646, + "mean_token_accuracy": 0.8146520599722862, + "num_tokens": 22887536.0, + "step": 1700 + }, + { + "epoch": 1.0414129110840438, + "grad_norm": 0.5522484183311462, + "learning_rate": 0.00015661298196543042, + "loss": 0.6889, + "mean_token_accuracy": 0.8100781336426734, + "num_tokens": 23017586.0, + "step": 1710 + }, + { + "epoch": 1.0475030450669915, + "grad_norm": 0.5221629738807678, + "learning_rate": 0.00015610576931471658, + "loss": 0.6939, + "mean_token_accuracy": 0.8114214852452278, + "num_tokens": 23151737.0, + "step": 1720 + }, + { + "epoch": 1.053593179049939, + "grad_norm": 0.5104020833969116, + "learning_rate": 0.00015559656341680164, + "loss": 0.716, + "mean_token_accuracy": 0.8063826873898506, + "num_tokens": 23280778.0, + "step": 1730 + }, + { + "epoch": 1.0596833130328867, + "grad_norm": 0.5163984298706055, + "learning_rate": 0.00015508538628570352, + "loss": 0.7188, + "mean_token_accuracy": 0.802527217566967, + "num_tokens": 23410327.0, + "step": 1740 + }, + { + "epoch": 1.0657734470158344, + "grad_norm": 0.5188373327255249, + "learning_rate": 0.00015457226002066058, + "loss": 0.6791, + "mean_token_accuracy": 0.8127639785408973, + "num_tokens": 23548616.0, + "step": 1750 + }, + { + "epoch": 1.071863580998782, + "grad_norm": 0.5983869433403015, + "learning_rate": 0.00015405720680517618, + "loss": 0.6869, + "mean_token_accuracy": 0.8110290810465812, + "num_tokens": 23682446.0, + "step": 1760 + }, + { + "epoch": 1.0779537149817295, + "grad_norm": 0.5919123291969299, + "learning_rate": 0.00015354024890605985, + "loss": 0.7419, + "mean_token_accuracy": 0.7984233900904656, + "num_tokens": 23806352.0, + "step": 1770 + }, + { + "epoch": 1.0840438489646773, + "grad_norm": 0.4900698661804199, + "learning_rate": 0.0001530214086724644, + "loss": 0.6781, + "mean_token_accuracy": 0.8152358055114746, + "num_tokens": 23942964.0, + "step": 1780 + }, + { + "epoch": 1.0901339829476249, + "grad_norm": 0.5409672856330872, + "learning_rate": 0.00015250070853491986, + "loss": 0.7157, + "mean_token_accuracy": 0.803682966530323, + "num_tokens": 24070937.0, + "step": 1790 + }, + { + "epoch": 1.0962241169305724, + "grad_norm": 0.5581572651863098, + "learning_rate": 0.0001519781710043638, + "loss": 0.7261, + "mean_token_accuracy": 0.8027503877878189, + "num_tokens": 24200686.0, + "step": 1800 + }, + { + "epoch": 1.1023142509135202, + "grad_norm": 0.503963053226471, + "learning_rate": 0.0001514538186711679, + "loss": 0.7125, + "mean_token_accuracy": 0.8042754918336869, + "num_tokens": 24329983.0, + "step": 1810 + }, + { + "epoch": 1.1084043848964678, + "grad_norm": 0.6159723997116089, + "learning_rate": 0.00015092767420416168, + "loss": 0.6873, + "mean_token_accuracy": 0.8115814313292503, + "num_tokens": 24465292.0, + "step": 1820 + }, + { + "epoch": 1.1144945188794153, + "grad_norm": 0.518172562122345, + "learning_rate": 0.00015039976034965214, + "loss": 0.6805, + "mean_token_accuracy": 0.8113815248012543, + "num_tokens": 24599980.0, + "step": 1830 + }, + { + "epoch": 1.1205846528623629, + "grad_norm": 0.5381601452827454, + "learning_rate": 0.0001498700999304407, + "loss": 0.6542, + "mean_token_accuracy": 0.8188014090061188, + "num_tokens": 24746703.0, + "step": 1840 + }, + { + "epoch": 1.1266747868453106, + "grad_norm": 0.5001223683357239, + "learning_rate": 0.00014933871584483615, + "loss": 0.7255, + "mean_token_accuracy": 0.8022593036293983, + "num_tokens": 24877604.0, + "step": 1850 + }, + { + "epoch": 1.1327649208282582, + "grad_norm": 0.5812251567840576, + "learning_rate": 0.00014880563106566512, + "loss": 0.6638, + "mean_token_accuracy": 0.8161928996443748, + "num_tokens": 25023049.0, + "step": 1860 + }, + { + "epoch": 1.1388550548112057, + "grad_norm": 0.5384249091148376, + "learning_rate": 0.0001482708686392786, + "loss": 0.6623, + "mean_token_accuracy": 0.8167443484067917, + "num_tokens": 25162124.0, + "step": 1870 + }, + { + "epoch": 1.1449451887941535, + "grad_norm": 0.5310192108154297, + "learning_rate": 0.00014773445168455576, + "loss": 0.7074, + "mean_token_accuracy": 0.8042578861117363, + "num_tokens": 25293569.0, + "step": 1880 + }, + { + "epoch": 1.151035322777101, + "grad_norm": 0.6224446296691895, + "learning_rate": 0.00014719640339190443, + "loss": 0.7094, + "mean_token_accuracy": 0.803679920732975, + "num_tokens": 25422953.0, + "step": 1890 + }, + { + "epoch": 1.1571254567600486, + "grad_norm": 0.5978189706802368, + "learning_rate": 0.00014665674702225853, + "loss": 0.6926, + "mean_token_accuracy": 0.8080565810203553, + "num_tokens": 25559091.0, + "step": 1900 + }, + { + "epoch": 1.1632155907429964, + "grad_norm": 0.6134657263755798, + "learning_rate": 0.00014611550590607245, + "loss": 0.6716, + "mean_token_accuracy": 0.8152063637971878, + "num_tokens": 25698134.0, + "step": 1910 + }, + { + "epoch": 1.169305724725944, + "grad_norm": 0.5075950026512146, + "learning_rate": 0.00014557270344231246, + "loss": 0.6613, + "mean_token_accuracy": 0.8169043198227882, + "num_tokens": 25835159.0, + "step": 1920 + }, + { + "epoch": 1.1753958587088915, + "grad_norm": 0.5035059452056885, + "learning_rate": 0.00014502836309744508, + "loss": 0.6903, + "mean_token_accuracy": 0.8096718549728393, + "num_tokens": 25970600.0, + "step": 1930 + }, + { + "epoch": 1.1814859926918393, + "grad_norm": 0.583890438079834, + "learning_rate": 0.00014448250840442254, + "loss": 0.6662, + "mean_token_accuracy": 0.8157578155398368, + "num_tokens": 26106658.0, + "step": 1940 + }, + { + "epoch": 1.1875761266747868, + "grad_norm": 0.5089572668075562, + "learning_rate": 0.00014393516296166552, + "loss": 0.7085, + "mean_token_accuracy": 0.8082539036870002, + "num_tokens": 26238847.0, + "step": 1950 + }, + { + "epoch": 1.1936662606577344, + "grad_norm": 0.4495029151439667, + "learning_rate": 0.00014338635043204288, + "loss": 0.7085, + "mean_token_accuracy": 0.8075269401073456, + "num_tokens": 26366417.0, + "step": 1960 + }, + { + "epoch": 1.1997563946406822, + "grad_norm": 0.6390108466148376, + "learning_rate": 0.00014283609454184855, + "loss": 0.6935, + "mean_token_accuracy": 0.8099950149655342, + "num_tokens": 26498101.0, + "step": 1970 + }, + { + "epoch": 1.2058465286236297, + "grad_norm": 0.5687986016273499, + "learning_rate": 0.00014228441907977607, + "loss": 0.7027, + "mean_token_accuracy": 0.8083449766039849, + "num_tokens": 26628513.0, + "step": 1980 + }, + { + "epoch": 1.2119366626065773, + "grad_norm": 0.487954318523407, + "learning_rate": 0.00014173134789588994, + "loss": 0.6731, + "mean_token_accuracy": 0.8129799589514732, + "num_tokens": 26761671.0, + "step": 1990 + }, + { + "epoch": 1.218026796589525, + "grad_norm": 0.5641826391220093, + "learning_rate": 0.00014117690490059447, + "loss": 0.6949, + "mean_token_accuracy": 0.8118783175945282, + "num_tokens": 26894870.0, + "step": 2000 + }, + { + "epoch": 1.2241169305724726, + "grad_norm": 0.5209829211235046, + "learning_rate": 0.00014062111406360034, + "loss": 0.6742, + "mean_token_accuracy": 0.816123254597187, + "num_tokens": 27027902.0, + "step": 2010 + }, + { + "epoch": 1.2302070645554202, + "grad_norm": 0.5218231678009033, + "learning_rate": 0.00014006399941288812, + "loss": 0.703, + "mean_token_accuracy": 0.805295330286026, + "num_tokens": 27157882.0, + "step": 2020 + }, + { + "epoch": 1.236297198538368, + "grad_norm": 0.48154470324516296, + "learning_rate": 0.00013950558503366957, + "loss": 0.6844, + "mean_token_accuracy": 0.811684039235115, + "num_tokens": 27290994.0, + "step": 2030 + }, + { + "epoch": 1.2423873325213155, + "grad_norm": 0.5417695045471191, + "learning_rate": 0.00013894589506734643, + "loss": 0.7253, + "mean_token_accuracy": 0.8018206775188446, + "num_tokens": 27420715.0, + "step": 2040 + }, + { + "epoch": 1.248477466504263, + "grad_norm": 0.5282937288284302, + "learning_rate": 0.00013838495371046671, + "loss": 0.682, + "mean_token_accuracy": 0.8128980100154877, + "num_tokens": 27552040.0, + "step": 2050 + }, + { + "epoch": 1.2545676004872108, + "grad_norm": 0.5213696360588074, + "learning_rate": 0.0001378227852136785, + "loss": 0.6728, + "mean_token_accuracy": 0.8128269612789154, + "num_tokens": 27686922.0, + "step": 2060 + }, + { + "epoch": 1.2606577344701584, + "grad_norm": 0.4823834300041199, + "learning_rate": 0.00013725941388068174, + "loss": 0.6626, + "mean_token_accuracy": 0.8177949145436287, + "num_tokens": 27825036.0, + "step": 2070 + }, + { + "epoch": 1.266747868453106, + "grad_norm": 0.6199477314949036, + "learning_rate": 0.0001366948640671775, + "loss": 0.686, + "mean_token_accuracy": 0.8107037082314491, + "num_tokens": 27961614.0, + "step": 2080 + }, + { + "epoch": 1.2728380024360537, + "grad_norm": 0.4916837513446808, + "learning_rate": 0.00013612916017981488, + "loss": 0.6738, + "mean_token_accuracy": 0.8149923622608185, + "num_tokens": 28099524.0, + "step": 2090 + }, + { + "epoch": 1.2789281364190013, + "grad_norm": 0.6001724600791931, + "learning_rate": 0.00013556232667513607, + "loss": 0.6637, + "mean_token_accuracy": 0.8173324480652809, + "num_tokens": 28237055.0, + "step": 2100 + }, + { + "epoch": 1.2850182704019488, + "grad_norm": 0.5887413620948792, + "learning_rate": 0.00013499438805851882, + "loss": 0.6744, + "mean_token_accuracy": 0.8149967223405838, + "num_tokens": 28370538.0, + "step": 2110 + }, + { + "epoch": 1.2911084043848966, + "grad_norm": 0.6208155751228333, + "learning_rate": 0.00013442536888311733, + "loss": 0.6973, + "mean_token_accuracy": 0.8103871151804924, + "num_tokens": 28499232.0, + "step": 2120 + }, + { + "epoch": 1.2971985383678442, + "grad_norm": 0.5026904344558716, + "learning_rate": 0.0001338552937488003, + "loss": 0.6739, + "mean_token_accuracy": 0.8153023451566697, + "num_tokens": 28633993.0, + "step": 2130 + }, + { + "epoch": 1.3032886723507917, + "grad_norm": 0.5218458771705627, + "learning_rate": 0.00013328418730108795, + "loss": 0.6619, + "mean_token_accuracy": 0.8166303977370262, + "num_tokens": 28774139.0, + "step": 2140 + }, + { + "epoch": 1.3093788063337393, + "grad_norm": 0.519872784614563, + "learning_rate": 0.00013271207423008622, + "loss": 0.6804, + "mean_token_accuracy": 0.8150519266724586, + "num_tokens": 28910109.0, + "step": 2150 + }, + { + "epoch": 1.315468940316687, + "grad_norm": 0.5219667553901672, + "learning_rate": 0.00013213897926941942, + "loss": 0.6682, + "mean_token_accuracy": 0.8166522830724716, + "num_tokens": 29045967.0, + "step": 2160 + }, + { + "epoch": 1.3215590742996346, + "grad_norm": 0.5744656920433044, + "learning_rate": 0.000131564927195161, + "loss": 0.6772, + "mean_token_accuracy": 0.8149690836668014, + "num_tokens": 29180769.0, + "step": 2170 + }, + { + "epoch": 1.3276492082825821, + "grad_norm": 0.5673508048057556, + "learning_rate": 0.00013098994282476236, + "loss": 0.6841, + "mean_token_accuracy": 0.812624742090702, + "num_tokens": 29313512.0, + "step": 2180 + }, + { + "epoch": 1.3337393422655297, + "grad_norm": 0.5187074542045593, + "learning_rate": 0.00013041405101598, + "loss": 0.6281, + "mean_token_accuracy": 0.8221091449260711, + "num_tokens": 29454589.0, + "step": 2190 + }, + { + "epoch": 1.3398294762484775, + "grad_norm": 0.5621201992034912, + "learning_rate": 0.00012983727666580086, + "loss": 0.6755, + "mean_token_accuracy": 0.8157430678606034, + "num_tokens": 29589968.0, + "step": 2200 + }, + { + "epoch": 1.345919610231425, + "grad_norm": 0.579699695110321, + "learning_rate": 0.00012925964470936598, + "loss": 0.6859, + "mean_token_accuracy": 0.8122102931141854, + "num_tokens": 29720188.0, + "step": 2210 + }, + { + "epoch": 1.3520097442143726, + "grad_norm": 0.6406823992729187, + "learning_rate": 0.00012868118011889236, + "loss": 0.684, + "mean_token_accuracy": 0.8107294023036957, + "num_tokens": 29848418.0, + "step": 2220 + }, + { + "epoch": 1.3580998781973204, + "grad_norm": 0.4707708954811096, + "learning_rate": 0.00012810190790259367, + "loss": 0.6607, + "mean_token_accuracy": 0.8182852879166603, + "num_tokens": 29988202.0, + "step": 2230 + }, + { + "epoch": 1.364190012180268, + "grad_norm": 0.6458183526992798, + "learning_rate": 0.00012752185310359874, + "loss": 0.6935, + "mean_token_accuracy": 0.8089477211236954, + "num_tokens": 30119777.0, + "step": 2240 + }, + { + "epoch": 1.3702801461632155, + "grad_norm": 0.4278848469257355, + "learning_rate": 0.00012694104079886918, + "loss": 0.6565, + "mean_token_accuracy": 0.8185079246759415, + "num_tokens": 30256776.0, + "step": 2250 + }, + { + "epoch": 1.3763702801461632, + "grad_norm": 0.5647698044776917, + "learning_rate": 0.00012635949609811505, + "loss": 0.6636, + "mean_token_accuracy": 0.8155051723122597, + "num_tokens": 30395629.0, + "step": 2260 + }, + { + "epoch": 1.3824604141291108, + "grad_norm": 0.43498411774635315, + "learning_rate": 0.00012577724414270937, + "loss": 0.689, + "mean_token_accuracy": 0.8125654354691505, + "num_tokens": 30532805.0, + "step": 2270 + }, + { + "epoch": 1.3885505481120584, + "grad_norm": 0.5296844244003296, + "learning_rate": 0.00012519431010460136, + "loss": 0.6854, + "mean_token_accuracy": 0.8122918352484703, + "num_tokens": 30664642.0, + "step": 2280 + }, + { + "epoch": 1.3946406820950061, + "grad_norm": 0.44080430269241333, + "learning_rate": 0.000124610719185228, + "loss": 0.6405, + "mean_token_accuracy": 0.8192834481596947, + "num_tokens": 30805370.0, + "step": 2290 + }, + { + "epoch": 1.4007308160779537, + "grad_norm": 0.5946847796440125, + "learning_rate": 0.00012402649661442453, + "loss": 0.7025, + "mean_token_accuracy": 0.8085126876831055, + "num_tokens": 30936385.0, + "step": 2300 + }, + { + "epoch": 1.4068209500609012, + "grad_norm": 0.6572047472000122, + "learning_rate": 0.0001234416676493339, + "loss": 0.709, + "mean_token_accuracy": 0.8046677514910698, + "num_tokens": 31067615.0, + "step": 2310 + }, + { + "epoch": 1.412911084043849, + "grad_norm": 0.4797047972679138, + "learning_rate": 0.0001228562575733147, + "loss": 0.6675, + "mean_token_accuracy": 0.8157136350870132, + "num_tokens": 31200044.0, + "step": 2320 + }, + { + "epoch": 1.4190012180267966, + "grad_norm": 0.5451430082321167, + "learning_rate": 0.0001222702916948481, + "loss": 0.6746, + "mean_token_accuracy": 0.8092615008354187, + "num_tokens": 31334451.0, + "step": 2330 + }, + { + "epoch": 1.4250913520097441, + "grad_norm": 0.5049906969070435, + "learning_rate": 0.00012168379534644371, + "loss": 0.6515, + "mean_token_accuracy": 0.8203717589378356, + "num_tokens": 31472218.0, + "step": 2340 + }, + { + "epoch": 1.431181485992692, + "grad_norm": 0.6531693935394287, + "learning_rate": 0.00012109679388354462, + "loss": 0.6778, + "mean_token_accuracy": 0.8134923160076142, + "num_tokens": 31605853.0, + "step": 2350 + }, + { + "epoch": 1.4372716199756395, + "grad_norm": 0.5340039730072021, + "learning_rate": 0.00012050931268343089, + "loss": 0.6628, + "mean_token_accuracy": 0.8176047816872597, + "num_tokens": 31741034.0, + "step": 2360 + }, + { + "epoch": 1.443361753958587, + "grad_norm": 0.4518280625343323, + "learning_rate": 0.00011992137714412266, + "loss": 0.6407, + "mean_token_accuracy": 0.8207336485385894, + "num_tokens": 31878661.0, + "step": 2370 + }, + { + "epoch": 1.4494518879415348, + "grad_norm": 0.5232827067375183, + "learning_rate": 0.00011933301268328212, + "loss": 0.6742, + "mean_token_accuracy": 0.8158077761530876, + "num_tokens": 32016524.0, + "step": 2380 + }, + { + "epoch": 1.4555420219244823, + "grad_norm": 0.5181542634963989, + "learning_rate": 0.00011874424473711457, + "loss": 0.699, + "mean_token_accuracy": 0.8078866004943848, + "num_tokens": 32146820.0, + "step": 2390 + }, + { + "epoch": 1.46163215590743, + "grad_norm": 0.5801041126251221, + "learning_rate": 0.00011815509875926883, + "loss": 0.6572, + "mean_token_accuracy": 0.8183338135480881, + "num_tokens": 32285928.0, + "step": 2400 + }, + { + "epoch": 1.4677222898903777, + "grad_norm": 0.5347133874893188, + "learning_rate": 0.00011756560021973679, + "loss": 0.6738, + "mean_token_accuracy": 0.8143690213561058, + "num_tokens": 32416470.0, + "step": 2410 + }, + { + "epoch": 1.4738124238733252, + "grad_norm": 0.4945615231990814, + "learning_rate": 0.0001169757746037524, + "loss": 0.6505, + "mean_token_accuracy": 0.8196728631854058, + "num_tokens": 32553798.0, + "step": 2420 + }, + { + "epoch": 1.4799025578562728, + "grad_norm": 0.5072743892669678, + "learning_rate": 0.00011638564741068965, + "loss": 0.625, + "mean_token_accuracy": 0.826240348815918, + "num_tokens": 32692511.0, + "step": 2430 + }, + { + "epoch": 1.4859926918392206, + "grad_norm": 0.5887538194656372, + "learning_rate": 0.00011579524415296043, + "loss": 0.6904, + "mean_token_accuracy": 0.8112018033862114, + "num_tokens": 32818836.0, + "step": 2440 + }, + { + "epoch": 1.4920828258221681, + "grad_norm": 0.5464449524879456, + "learning_rate": 0.00011520459035491142, + "loss": 0.6553, + "mean_token_accuracy": 0.8198345899581909, + "num_tokens": 32957967.0, + "step": 2450 + }, + { + "epoch": 1.4981729598051157, + "grad_norm": 0.5787419676780701, + "learning_rate": 0.00011461371155172071, + "loss": 0.663, + "mean_token_accuracy": 0.8155046373605728, + "num_tokens": 33094241.0, + "step": 2460 + }, + { + "epoch": 1.5042630937880634, + "grad_norm": 0.5159268975257874, + "learning_rate": 0.00011402263328829384, + "loss": 0.6792, + "mean_token_accuracy": 0.8127613604068756, + "num_tokens": 33225474.0, + "step": 2470 + }, + { + "epoch": 1.510353227771011, + "grad_norm": 0.5665333867073059, + "learning_rate": 0.00011343138111815939, + "loss": 0.6265, + "mean_token_accuracy": 0.8276977241039276, + "num_tokens": 33368246.0, + "step": 2480 + }, + { + "epoch": 1.5164433617539586, + "grad_norm": 0.6272276639938354, + "learning_rate": 0.00011283998060236421, + "loss": 0.6734, + "mean_token_accuracy": 0.816029068827629, + "num_tokens": 33503967.0, + "step": 2490 + }, + { + "epoch": 1.5225334957369063, + "grad_norm": 0.5275886654853821, + "learning_rate": 0.0001122484573083686, + "loss": 0.6457, + "mean_token_accuracy": 0.8222623988986015, + "num_tokens": 33641826.0, + "step": 2500 + }, + { + "epoch": 1.5286236297198539, + "grad_norm": 0.5526687502861023, + "learning_rate": 0.00011165683680894072, + "loss": 0.6795, + "mean_token_accuracy": 0.8127825185656548, + "num_tokens": 33774185.0, + "step": 2510 + }, + { + "epoch": 1.5347137637028014, + "grad_norm": 0.6226133704185486, + "learning_rate": 0.00011106514468105111, + "loss": 0.6684, + "mean_token_accuracy": 0.815614765882492, + "num_tokens": 33907116.0, + "step": 2520 + }, + { + "epoch": 1.5408038976857492, + "grad_norm": 0.612832248210907, + "learning_rate": 0.000110473406504767, + "loss": 0.6287, + "mean_token_accuracy": 0.8220825806260109, + "num_tokens": 34048267.0, + "step": 2530 + }, + { + "epoch": 1.5468940316686965, + "grad_norm": 0.6066681742668152, + "learning_rate": 0.00010988164786214639, + "loss": 0.6851, + "mean_token_accuracy": 0.8115911707282066, + "num_tokens": 34177555.0, + "step": 2540 + }, + { + "epoch": 1.5529841656516443, + "grad_norm": 0.6376360058784485, + "learning_rate": 0.00010928989433613204, + "loss": 0.6921, + "mean_token_accuracy": 0.8096534594893455, + "num_tokens": 34308932.0, + "step": 2550 + }, + { + "epoch": 1.559074299634592, + "grad_norm": 0.6083400249481201, + "learning_rate": 0.00010869817150944546, + "loss": 0.6575, + "mean_token_accuracy": 0.8187816679477692, + "num_tokens": 34443994.0, + "step": 2560 + }, + { + "epoch": 1.5651644336175394, + "grad_norm": 0.6098156571388245, + "learning_rate": 0.00010810650496348116, + "loss": 0.6092, + "mean_token_accuracy": 0.8285523638129234, + "num_tokens": 34588403.0, + "step": 2570 + }, + { + "epoch": 1.5712545676004872, + "grad_norm": 0.47795701026916504, + "learning_rate": 0.00010751492027720027, + "loss": 0.6423, + "mean_token_accuracy": 0.8211737647652626, + "num_tokens": 34730426.0, + "step": 2580 + }, + { + "epoch": 1.577344701583435, + "grad_norm": 0.560787558555603, + "learning_rate": 0.00010692344302602515, + "loss": 0.6707, + "mean_token_accuracy": 0.8134441033005715, + "num_tokens": 34861708.0, + "step": 2590 + }, + { + "epoch": 1.5834348355663823, + "grad_norm": 0.5722246766090393, + "learning_rate": 0.00010633209878073343, + "loss": 0.6533, + "mean_token_accuracy": 0.8185199156403542, + "num_tokens": 34997377.0, + "step": 2600 + }, + { + "epoch": 1.58952496954933, + "grad_norm": 0.4941788613796234, + "learning_rate": 0.00010574091310635263, + "loss": 0.6487, + "mean_token_accuracy": 0.8205527886748314, + "num_tokens": 35133685.0, + "step": 2610 + }, + { + "epoch": 1.5956151035322779, + "grad_norm": 0.575986921787262, + "learning_rate": 0.00010514991156105493, + "loss": 0.6615, + "mean_token_accuracy": 0.8179458349943161, + "num_tokens": 35270993.0, + "step": 2620 + }, + { + "epoch": 1.6017052375152252, + "grad_norm": 0.5677866339683533, + "learning_rate": 0.00010455911969505228, + "loss": 0.6572, + "mean_token_accuracy": 0.8155815675854683, + "num_tokens": 35402062.0, + "step": 2630 + }, + { + "epoch": 1.607795371498173, + "grad_norm": 0.6232825517654419, + "learning_rate": 0.00010396856304949162, + "loss": 0.6477, + "mean_token_accuracy": 0.8209305629134178, + "num_tokens": 35537394.0, + "step": 2640 + }, + { + "epoch": 1.6138855054811205, + "grad_norm": 0.6252410411834717, + "learning_rate": 0.00010337826715535102, + "loss": 0.6819, + "mean_token_accuracy": 0.8137489795684815, + "num_tokens": 35669332.0, + "step": 2650 + }, + { + "epoch": 1.619975639464068, + "grad_norm": 0.5850580334663391, + "learning_rate": 0.0001027882575323356, + "loss": 0.6831, + "mean_token_accuracy": 0.8099577218294144, + "num_tokens": 35799095.0, + "step": 2660 + }, + { + "epoch": 1.6260657734470159, + "grad_norm": 0.5118699073791504, + "learning_rate": 0.00010219855968777442, + "loss": 0.681, + "mean_token_accuracy": 0.8123177006840706, + "num_tokens": 35928313.0, + "step": 2670 + }, + { + "epoch": 1.6321559074299634, + "grad_norm": 0.5392698645591736, + "learning_rate": 0.00010160919911551774, + "loss": 0.6536, + "mean_token_accuracy": 0.8185337752103805, + "num_tokens": 36062033.0, + "step": 2680 + }, + { + "epoch": 1.638246041412911, + "grad_norm": 0.5542203783988953, + "learning_rate": 0.00010102020129483481, + "loss": 0.6859, + "mean_token_accuracy": 0.8107540607452393, + "num_tokens": 36190194.0, + "step": 2690 + }, + { + "epoch": 1.6443361753958587, + "grad_norm": 0.5962918996810913, + "learning_rate": 0.0001004315916893124, + "loss": 0.64, + "mean_token_accuracy": 0.8226593688130379, + "num_tokens": 36322437.0, + "step": 2700 + }, + { + "epoch": 1.6504263093788063, + "grad_norm": 0.6391364932060242, + "learning_rate": 9.984339574575394e-05, + "loss": 0.6457, + "mean_token_accuracy": 0.8250340327620507, + "num_tokens": 36463231.0, + "step": 2710 + }, + { + "epoch": 1.6565164433617539, + "grad_norm": 0.5798075795173645, + "learning_rate": 9.92556388930794e-05, + "loss": 0.6901, + "mean_token_accuracy": 0.8104871213436127, + "num_tokens": 36588963.0, + "step": 2720 + }, + { + "epoch": 1.6626065773447016, + "grad_norm": 0.5375143885612488, + "learning_rate": 9.866834654122597e-05, + "loss": 0.6723, + "mean_token_accuracy": 0.8132491707801819, + "num_tokens": 36724295.0, + "step": 2730 + }, + { + "epoch": 1.6686967113276492, + "grad_norm": 0.5556331276893616, + "learning_rate": 9.808154408004942e-05, + "loss": 0.6316, + "mean_token_accuracy": 0.8221101492643357, + "num_tokens": 36855978.0, + "step": 2740 + }, + { + "epoch": 1.6747868453105967, + "grad_norm": 0.5330142974853516, + "learning_rate": 9.749525687822674e-05, + "loss": 0.6269, + "mean_token_accuracy": 0.8239532545208931, + "num_tokens": 36994164.0, + "step": 2750 + }, + { + "epoch": 1.6808769792935445, + "grad_norm": 0.568084716796875, + "learning_rate": 9.6909510282159e-05, + "loss": 0.6568, + "mean_token_accuracy": 0.8158794924616813, + "num_tokens": 37130680.0, + "step": 2760 + }, + { + "epoch": 1.686967113276492, + "grad_norm": 0.5072943568229675, + "learning_rate": 9.632432961487585e-05, + "loss": 0.6838, + "mean_token_accuracy": 0.8121756613254547, + "num_tokens": 37261462.0, + "step": 2770 + }, + { + "epoch": 1.6930572472594396, + "grad_norm": 0.5469337701797485, + "learning_rate": 9.573974017494069e-05, + "loss": 0.6447, + "mean_token_accuracy": 0.8220986798405647, + "num_tokens": 37395606.0, + "step": 2780 + }, + { + "epoch": 1.6991473812423874, + "grad_norm": 0.57918381690979, + "learning_rate": 9.515576723535689e-05, + "loss": 0.6217, + "mean_token_accuracy": 0.822702020406723, + "num_tokens": 37533585.0, + "step": 2790 + }, + { + "epoch": 1.705237515225335, + "grad_norm": 0.6425563097000122, + "learning_rate": 9.45724360424753e-05, + "loss": 0.6435, + "mean_token_accuracy": 0.8198476612567902, + "num_tokens": 37672877.0, + "step": 2800 + }, + { + "epoch": 1.7113276492082825, + "grad_norm": 0.5059729218482971, + "learning_rate": 9.398977181490274e-05, + "loss": 0.6579, + "mean_token_accuracy": 0.8166012555360794, + "num_tokens": 37809109.0, + "step": 2810 + }, + { + "epoch": 1.7174177831912303, + "grad_norm": 0.5450888276100159, + "learning_rate": 9.340779974241167e-05, + "loss": 0.6175, + "mean_token_accuracy": 0.8274259582161904, + "num_tokens": 37950597.0, + "step": 2820 + }, + { + "epoch": 1.7235079171741778, + "grad_norm": 0.6464765667915344, + "learning_rate": 9.282654498485139e-05, + "loss": 0.6636, + "mean_token_accuracy": 0.8163545817136765, + "num_tokens": 38086904.0, + "step": 2830 + }, + { + "epoch": 1.7295980511571254, + "grad_norm": 0.6118177175521851, + "learning_rate": 9.22460326710601e-05, + "loss": 0.6696, + "mean_token_accuracy": 0.8133967757225037, + "num_tokens": 38219759.0, + "step": 2840 + }, + { + "epoch": 1.7356881851400732, + "grad_norm": 0.5518969893455505, + "learning_rate": 9.16662878977786e-05, + "loss": 0.6659, + "mean_token_accuracy": 0.8180875137448311, + "num_tokens": 38349770.0, + "step": 2850 + }, + { + "epoch": 1.7417783191230207, + "grad_norm": 0.6465517282485962, + "learning_rate": 9.108733572856549e-05, + "loss": 0.6581, + "mean_token_accuracy": 0.8170399129390716, + "num_tokens": 38482303.0, + "step": 2860 + }, + { + "epoch": 1.7478684531059683, + "grad_norm": 0.5193557143211365, + "learning_rate": 9.050920119271335e-05, + "loss": 0.6543, + "mean_token_accuracy": 0.8178304255008697, + "num_tokens": 38615426.0, + "step": 2870 + }, + { + "epoch": 1.753958587088916, + "grad_norm": 0.611529529094696, + "learning_rate": 8.993190928416682e-05, + "loss": 0.6248, + "mean_token_accuracy": 0.8259203046560287, + "num_tokens": 38755859.0, + "step": 2880 + }, + { + "epoch": 1.7600487210718636, + "grad_norm": 0.5405944585800171, + "learning_rate": 8.935548496044198e-05, + "loss": 0.6232, + "mean_token_accuracy": 0.8281204700469971, + "num_tokens": 38893007.0, + "step": 2890 + }, + { + "epoch": 1.7661388550548112, + "grad_norm": 0.6433010697364807, + "learning_rate": 8.877995314154748e-05, + "loss": 0.6751, + "mean_token_accuracy": 0.8155393078923225, + "num_tokens": 39020285.0, + "step": 2900 + }, + { + "epoch": 1.772228989037759, + "grad_norm": 0.47974956035614014, + "learning_rate": 8.820533870890717e-05, + "loss": 0.6527, + "mean_token_accuracy": 0.8197720810770989, + "num_tokens": 39151426.0, + "step": 2910 + }, + { + "epoch": 1.7783191230207065, + "grad_norm": 0.5529680848121643, + "learning_rate": 8.763166650428436e-05, + "loss": 0.6262, + "mean_token_accuracy": 0.8256829127669334, + "num_tokens": 39294242.0, + "step": 2920 + }, + { + "epoch": 1.784409257003654, + "grad_norm": 0.6060122847557068, + "learning_rate": 8.705896132870797e-05, + "loss": 0.6563, + "mean_token_accuracy": 0.8192467406392098, + "num_tokens": 39425879.0, + "step": 2930 + }, + { + "epoch": 1.7904993909866018, + "grad_norm": 0.6099355220794678, + "learning_rate": 8.648724794140017e-05, + "loss": 0.6664, + "mean_token_accuracy": 0.8186777010560036, + "num_tokens": 39559787.0, + "step": 2940 + }, + { + "epoch": 1.7965895249695494, + "grad_norm": 0.5908733010292053, + "learning_rate": 8.591655105870615e-05, + "loss": 0.6712, + "mean_token_accuracy": 0.8136340633034707, + "num_tokens": 39689823.0, + "step": 2950 + }, + { + "epoch": 1.802679658952497, + "grad_norm": 0.5845519304275513, + "learning_rate": 8.534689535302553e-05, + "loss": 0.6608, + "mean_token_accuracy": 0.8170475289225578, + "num_tokens": 39820725.0, + "step": 2960 + }, + { + "epoch": 1.8087697929354447, + "grad_norm": 0.6311175227165222, + "learning_rate": 8.47783054517457e-05, + "loss": 0.6491, + "mean_token_accuracy": 0.8193596869707107, + "num_tokens": 39947874.0, + "step": 2970 + }, + { + "epoch": 1.814859926918392, + "grad_norm": 0.5293188691139221, + "learning_rate": 8.421080593617706e-05, + "loss": 0.6105, + "mean_token_accuracy": 0.83141258507967, + "num_tokens": 40091297.0, + "step": 2980 + }, + { + "epoch": 1.8209500609013398, + "grad_norm": 0.5252617597579956, + "learning_rate": 8.364442134049049e-05, + "loss": 0.6356, + "mean_token_accuracy": 0.8237936720252037, + "num_tokens": 40229207.0, + "step": 2990 + }, + { + "epoch": 1.8270401948842876, + "grad_norm": 0.6039798855781555, + "learning_rate": 8.30791761506565e-05, + "loss": 0.6456, + "mean_token_accuracy": 0.8206364914774895, + "num_tokens": 40364863.0, + "step": 3000 + }, + { + "epoch": 1.833130328867235, + "grad_norm": 0.5508609414100647, + "learning_rate": 8.251509480338684e-05, + "loss": 0.6229, + "mean_token_accuracy": 0.8255992740392685, + "num_tokens": 40504123.0, + "step": 3010 + }, + { + "epoch": 1.8392204628501827, + "grad_norm": 0.5637634992599487, + "learning_rate": 8.195220168507789e-05, + "loss": 0.6026, + "mean_token_accuracy": 0.8290412962436676, + "num_tokens": 40646821.0, + "step": 3020 + }, + { + "epoch": 1.8453105968331305, + "grad_norm": 0.5463610291481018, + "learning_rate": 8.139052113075645e-05, + "loss": 0.6278, + "mean_token_accuracy": 0.8244929850101471, + "num_tokens": 40778989.0, + "step": 3030 + }, + { + "epoch": 1.8514007308160778, + "grad_norm": 0.5360645055770874, + "learning_rate": 8.083007742302776e-05, + "loss": 0.6336, + "mean_token_accuracy": 0.8228462666273118, + "num_tokens": 40917560.0, + "step": 3040 + }, + { + "epoch": 1.8574908647990256, + "grad_norm": 0.5185632705688477, + "learning_rate": 8.02708947910255e-05, + "loss": 0.5991, + "mean_token_accuracy": 0.830042028427124, + "num_tokens": 41059707.0, + "step": 3050 + }, + { + "epoch": 1.8635809987819734, + "grad_norm": 0.6445353627204895, + "learning_rate": 7.971299740936456e-05, + "loss": 0.6555, + "mean_token_accuracy": 0.8169184163212776, + "num_tokens": 41192515.0, + "step": 3060 + }, + { + "epoch": 1.8696711327649207, + "grad_norm": 0.5360421538352966, + "learning_rate": 7.915640939709576e-05, + "loss": 0.6234, + "mean_token_accuracy": 0.8257398083806038, + "num_tokens": 41330047.0, + "step": 3070 + }, + { + "epoch": 1.8757612667478685, + "grad_norm": 0.58651202917099, + "learning_rate": 7.860115481666333e-05, + "loss": 0.6564, + "mean_token_accuracy": 0.8205534905195236, + "num_tokens": 41460379.0, + "step": 3080 + }, + { + "epoch": 1.881851400730816, + "grad_norm": 0.6842640042304993, + "learning_rate": 7.804725767286427e-05, + "loss": 0.6935, + "mean_token_accuracy": 0.8097458809614182, + "num_tokens": 41581210.0, + "step": 3090 + }, + { + "epoch": 1.8879415347137636, + "grad_norm": 0.5175514817237854, + "learning_rate": 7.749474191181096e-05, + "loss": 0.6393, + "mean_token_accuracy": 0.8219558611512184, + "num_tokens": 41714792.0, + "step": 3100 + }, + { + "epoch": 1.8940316686967114, + "grad_norm": 0.5963588356971741, + "learning_rate": 7.694363141989575e-05, + "loss": 0.658, + "mean_token_accuracy": 0.8182344615459443, + "num_tokens": 41846600.0, + "step": 3110 + }, + { + "epoch": 1.900121802679659, + "grad_norm": 0.6149535775184631, + "learning_rate": 7.639395002275827e-05, + "loss": 0.6499, + "mean_token_accuracy": 0.8208124756813049, + "num_tokens": 41977627.0, + "step": 3120 + }, + { + "epoch": 1.9062119366626065, + "grad_norm": 0.5739808678627014, + "learning_rate": 7.584572148425544e-05, + "loss": 0.6703, + "mean_token_accuracy": 0.8125967502593994, + "num_tokens": 42104510.0, + "step": 3130 + }, + { + "epoch": 1.9123020706455542, + "grad_norm": 0.5982648730278015, + "learning_rate": 7.529896950543416e-05, + "loss": 0.6513, + "mean_token_accuracy": 0.8201186507940292, + "num_tokens": 42236168.0, + "step": 3140 + }, + { + "epoch": 1.9183922046285018, + "grad_norm": 0.5896486043930054, + "learning_rate": 7.475371772350658e-05, + "loss": 0.6133, + "mean_token_accuracy": 0.8260134413838387, + "num_tokens": 42375086.0, + "step": 3150 + }, + { + "epoch": 1.9244823386114494, + "grad_norm": 0.6223361492156982, + "learning_rate": 7.420998971082833e-05, + "loss": 0.6638, + "mean_token_accuracy": 0.8162963137030601, + "num_tokens": 42506457.0, + "step": 3160 + }, + { + "epoch": 1.9305724725943971, + "grad_norm": 0.709854245185852, + "learning_rate": 7.366780897387924e-05, + "loss": 0.6324, + "mean_token_accuracy": 0.8247174829244613, + "num_tokens": 42640886.0, + "step": 3170 + }, + { + "epoch": 1.9366626065773447, + "grad_norm": 0.6794169545173645, + "learning_rate": 7.312719895224736e-05, + "loss": 0.6164, + "mean_token_accuracy": 0.82676922082901, + "num_tokens": 42781318.0, + "step": 3180 + }, + { + "epoch": 1.9427527405602922, + "grad_norm": 0.49305981397628784, + "learning_rate": 7.258818301761532e-05, + "loss": 0.6216, + "mean_token_accuracy": 0.8258268669247627, + "num_tokens": 42919381.0, + "step": 3190 + }, + { + "epoch": 1.94884287454324, + "grad_norm": 0.5072576999664307, + "learning_rate": 7.205078447275031e-05, + "loss": 0.6407, + "mean_token_accuracy": 0.819316141307354, + "num_tokens": 43056494.0, + "step": 3200 + }, + { + "epoch": 1.9549330085261876, + "grad_norm": 0.6188381314277649, + "learning_rate": 7.151502655049623e-05, + "loss": 0.6022, + "mean_token_accuracy": 0.8328602254390717, + "num_tokens": 43197795.0, + "step": 3210 + }, + { + "epoch": 1.9610231425091351, + "grad_norm": 0.5626131296157837, + "learning_rate": 7.098093241276962e-05, + "loss": 0.6245, + "mean_token_accuracy": 0.8258091285824776, + "num_tokens": 43340325.0, + "step": 3220 + }, + { + "epoch": 1.967113276492083, + "grad_norm": 0.5364338755607605, + "learning_rate": 7.044852514955816e-05, + "loss": 0.6454, + "mean_token_accuracy": 0.8199462234973908, + "num_tokens": 43472226.0, + "step": 3230 + }, + { + "epoch": 1.9732034104750305, + "grad_norm": 0.5460382699966431, + "learning_rate": 6.991782777792244e-05, + "loss": 0.6214, + "mean_token_accuracy": 0.8251617640256882, + "num_tokens": 43609559.0, + "step": 3240 + }, + { + "epoch": 1.979293544457978, + "grad_norm": 0.6013203263282776, + "learning_rate": 6.938886324100097e-05, + "loss": 0.6422, + "mean_token_accuracy": 0.8197862133383751, + "num_tokens": 43743060.0, + "step": 3250 + }, + { + "epoch": 1.9853836784409258, + "grad_norm": 0.6688512563705444, + "learning_rate": 6.88616544070182e-05, + "loss": 0.6447, + "mean_token_accuracy": 0.820338460803032, + "num_tokens": 43876874.0, + "step": 3260 + }, + { + "epoch": 1.9914738124238733, + "grad_norm": 0.6146946549415588, + "learning_rate": 6.8336224068296e-05, + "loss": 0.6015, + "mean_token_accuracy": 0.8318811848759651, + "num_tokens": 44021963.0, + "step": 3270 + }, + { + "epoch": 1.997563946406821, + "grad_norm": 0.5972597599029541, + "learning_rate": 6.781259494026821e-05, + "loss": 0.6094, + "mean_token_accuracy": 0.8282003849744797, + "num_tokens": 44159207.0, + "step": 3280 + } + ], + "logging_steps": 10, + "max_steps": 4926, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.398768566923166e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3284/training_args.bin b/checkpoint-3284/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7aaf98c04505b149e2e8903151128d95e7ab7b98 --- /dev/null +++ b/checkpoint-3284/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d57fe99d74b7e16ba38edb5265078ef5a69a65f6b630b2ca3feaacdb770f49e +size 6161 diff --git a/checkpoint-4926/README.md b/checkpoint-4926/README.md new file mode 100644 index 0000000000000000000000000000000000000000..88d5a6d478d4dd6dceb04cd4688496bde58deabd --- /dev/null +++ b/checkpoint-4926/README.md @@ -0,0 +1,208 @@ +--- +base_model: openai/gpt-oss-20b +library_name: peft +tags: +- base_model:adapter:openai/gpt-oss-20b +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-4926/adapter_config.json b/checkpoint-4926/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f7444c0a113bd6bbbb6725b2090d1700b55b21c --- /dev/null +++ b/checkpoint-4926/adapter_config.json @@ -0,0 +1,49 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GptOssForCausalLM", + "parent_library": "transformers.models.gpt_oss.modeling_gpt_oss" + }, + "base_model_name_or_path": "openai/gpt-oss-20b", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "target_parameters": [ + "7.mlp.experts.gate_up_proj", + "7.mlp.experts.down_proj", + "15.mlp.experts.gate_up_proj", + "15.mlp.experts.down_proj", + "23.mlp.experts.gate_up_proj", + "23.mlp.experts.down_proj" + ], + "task_type": null, + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-4926/adapter_model.safetensors b/checkpoint-4926/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e7b60cdcad6eaf0526b0bf9915287ce60c75504 --- /dev/null +++ b/checkpoint-4926/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a12fc000b95c82bac6a99a48756a04e45988fdbd7cf08e8fc7d3059abbcb8677 +size 2366470368 diff --git a/checkpoint-4926/chat_template.jinja b/checkpoint-4926/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..dc7bb11927d29f653ba2740f2db2c688fd77592f --- /dev/null +++ b/checkpoint-4926/chat_template.jinja @@ -0,0 +1,331 @@ +{#- + In addition to the normal inputs of `messages` and `tools`, this template also accepts the + following kwargs: + - "builtin_tools": A list, can contain "browser" and/or "python". + - "model_identity": A string that optionally describes the model identity. + - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium". + #} + +{#- Tool Definition Rendering ============================================== #} +{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%} + {%- if param_spec.type == "array" -%} + {%- if param_spec['items'] -%} + {%- if param_spec['items']['type'] == "string" -%} + {{- "string[]" }} + {%- elif param_spec['items']['type'] == "number" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "integer" -%} + {{- "number[]" }} + {%- elif param_spec['items']['type'] == "boolean" -%} + {{- "boolean[]" }} + {%- else -%} + {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%} + {%- if inner_type == "object | object" or inner_type|length > 50 -%} + {{- "any[]" }} + {%- else -%} + {{- inner_type + "[]" }} + {%- endif -%} + {%- endif -%} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- else -%} + {{- "any[]" }} + {%- if param_spec.nullable -%} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%} + {#- Handle array of types like ["object", "object"] from Union[dict, list] #} + {%- if param_spec.type | length > 1 -%} + {{- param_spec.type | join(" | ") }} + {%- else -%} + {{- param_spec.type[0] }} + {%- endif -%} + {%- elif param_spec.oneOf -%} + {#- Handle oneOf schemas - check for complex unions and fallback to any #} + {%- set has_object_variants = false -%} + {%- for variant in param_spec.oneOf -%} + {%- if variant.type == "object" -%} + {%- set has_object_variants = true -%} + {%- endif -%} + {%- endfor -%} + {%- if has_object_variants and param_spec.oneOf|length > 1 -%} + {{- "any" }} + {%- else -%} + {%- for variant in param_spec.oneOf -%} + {{- render_typescript_type(variant, required_params) -}} + {%- if variant.description %} + {{- "// " + variant.description }} + {%- endif -%} + {%- if variant.default is defined %} + {{ "// default: " + variant.default|tojson }} + {%- endif -%} + {%- if not loop.last %} + {{- " | " }} + {% endif -%} + {%- endfor -%} + {%- endif -%} + {%- elif param_spec.type == "string" -%} + {%- if param_spec.enum -%} + {{- '"' + param_spec.enum|join('" | "') + '"' -}} + {%- else -%} + {{- "string" }} + {%- if param_spec.nullable %} + {{- " | null" }} + {%- endif -%} + {%- endif -%} + {%- elif param_spec.type == "number" -%} + {{- "number" }} + {%- elif param_spec.type == "integer" -%} + {{- "number" }} + {%- elif param_spec.type == "boolean" -%} + {{- "boolean" }} + + {%- elif param_spec.type == "object" -%} + {%- if param_spec.properties -%} + {{- "{\n" }} + {%- for prop_name, prop_spec in param_spec.properties.items() -%} + {{- prop_name -}} + {%- if prop_name not in (param_spec.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{ render_typescript_type(prop_spec, param_spec.required or []) }} + {%- if not loop.last -%} + {{-", " }} + {%- endif -%} + {%- endfor -%} + {{- "}" }} + {%- else -%} + {{- "object" }} + {%- endif -%} + {%- else -%} + {{- "any" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro render_tool_namespace(namespace_name, tools) -%} + {{- "## " + namespace_name + "\n\n" }} + {{- "namespace " + namespace_name + " {\n\n" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- "// " + tool.description + "\n" }} + {{- "type "+ tool.name + " = " }} + {%- if tool.parameters and tool.parameters.properties %} + {{- "(_: {\n" }} + {%- for param_name, param_spec in tool.parameters.properties.items() %} + {%- if param_spec.description %} + {{- "// " + param_spec.description + "\n" }} + {%- endif %} + {{- param_name }} + {%- if param_name not in (tool.parameters.required or []) -%} + {{- "?" }} + {%- endif -%} + {{- ": " }} + {{- render_typescript_type(param_spec, tool.parameters.required or []) }} + {%- if param_spec.default is defined -%} + {%- if param_spec.enum %} + {{- ", // default: " + param_spec.default }} + {%- elif param_spec.oneOf %} + {{- "// default: " + param_spec.default }} + {%- else %} + {{- ", // default: " + param_spec.default|tojson }} + {%- endif -%} + {%- endif -%} + {%- if not loop.last %} + {{- ",\n" }} + {%- else %} + {{- ",\n" }} + {%- endif -%} + {%- endfor %} + {{- "}) => any;\n\n" }} + {%- else -%} + {{- "() => any;\n\n" }} + {%- endif -%} + {%- endfor %} + {{- "} // namespace " + namespace_name }} +{%- endmacro -%} + +{%- macro render_builtin_tools(browser_tool, python_tool) -%} + {%- if browser_tool %} + {{- "## browser\n\n" }} + {{- "// Tool for browsing.\n" }} + {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }} + {{- "// Cite information from the tool using the following format:\n" }} + {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }} + {{- "// Do not quote more than 10 words directly from the tool output.\n" }} + {{- "// sources=web (default: web)\n" }} + {{- "namespace browser {\n\n" }} + {{- "// Searches for information related to `query` and displays `topn` results.\n" }} + {{- "type search = (_: {\n" }} + {{- "query: string,\n" }} + {{- "topn?: number, // default: 10\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }} + {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }} + {{- "// If `cursor` is not provided, the most recent page is implied.\n" }} + {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }} + {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }} + {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }} + {{- "type open = (_: {\n" }} + {{- "id?: number | string, // default: -1\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "loc?: number, // default: -1\n" }} + {{- "num_lines?: number, // default: -1\n" }} + {{- "view_source?: boolean, // default: false\n" }} + {{- "source?: string,\n" }} + {{- "}) => any;\n\n" }} + {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }} + {{- "type find = (_: {\n" }} + {{- "pattern: string,\n" }} + {{- "cursor?: number, // default: -1\n" }} + {{- "}) => any;\n\n" }} + {{- "} // namespace browser\n\n" }} + {%- endif -%} + + {%- if python_tool %} + {{- "## python\n\n" }} + {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }} + {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }} + {%- endif -%} +{%- endmacro -%} + +{#- System Message Construction ============================================ #} +{%- macro build_system_message() -%} + {%- if model_identity is not defined %} + {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %} + {%- endif %} + {{- model_identity + "\n" }} + {{- "Knowledge cutoff: 2024-06\n" }} + {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }} + {%- if reasoning_effort is not defined %} + {%- set reasoning_effort = "medium" %} + {%- endif %} + {{- "Reasoning: " + reasoning_effort + "\n\n" }} + {%- if builtin_tools %} + {{- "# Tools\n\n" }} + {%- set available_builtin_tools = namespace(browser=false, python=false) %} + {%- for tool in builtin_tools %} + {%- if tool == "browser" %} + {%- set available_builtin_tools.browser = true %} + {%- elif tool == "python" %} + {%- set available_builtin_tools.python = true %} + {%- endif %} + {%- endfor %} + {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }} + {%- endif -%} + {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }} + {%- if tools -%} + {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }} + {%- endif -%} +{%- endmacro -%} + +{#- Main Template Logic ================================================= #} +{#- Set defaults #} + +{#- Render system message #} +{{- "<|start|>system<|message|>" }} +{{- build_system_message() }} +{{- "<|end|>" }} + +{#- Extract developer message #} +{%- if messages[0].role == "developer" or messages[0].role == "system" %} + {%- set developer_message = messages[0].content %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set developer_message = "" %} + {%- set loop_messages = messages %} +{%- endif %} + +{#- Render developer message #} +{%- if developer_message or tools %} + {{- "<|start|>developer<|message|>" }} + {%- if developer_message %} + {{- "# Instructions\n\n" }} + {{- developer_message }} + {{- "\n\n" }} + {%- endif %} + {%- if tools -%} + {{- "# Tools\n\n" }} + {{- render_tool_namespace("functions", tools) }} + {%- endif -%} + {{- "<|end|>" }} +{%- endif %} + +{#- Render messages #} +{%- set last_tool_call = namespace(name=none) %} +{%- for message in loop_messages -%} + {#- At this point only assistant/user/tool messages should remain #} + {%- if message.role == 'assistant' -%} + {#- Checks to ensure the messages are being passed in the format we expect #} + {%- if "content" in message %} + {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "thinking" in message %} + {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %} + {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }} + {%- endif %} + {%- endif %} + {%- if "tool_calls" in message %} + {#- We need very careful handling here - we want to drop the tool call analysis message if the model #} + {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #} + {#- when we render CoT/analysis messages in inference. #} + {%- set future_final_message = namespace(found=false) %} + {%- for future_message in loop_messages[loop.index:] %} + {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %} + {%- set future_final_message.found = true %} + {%- endif %} + {%- endfor %} + {#- We assume max 1 tool call per message, and so we infer the tool call name #} + {#- in "tool" messages from the most recent assistant tool call name #} + {%- set tool_call = message.tool_calls[0] %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if message.content and message.thinking %} + {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }} + {%- elif message.content and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }} + {%- elif message.thinking and not future_final_message.found %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {{- "<|start|>assistant to=" }} + {{- "functions." + tool_call.name + "<|channel|>commentary " }} + {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }} + {{- tool_call.arguments|tojson }} + {{- "<|call|>" }} + {%- set last_tool_call.name = tool_call.name %} + {%- elif loop.last and not add_generation_prompt %} + {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #} + {#- This is a situation that should only occur in training, never in inference. #} + {%- if "thinking" in message %} + {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }} + {%- endif %} + {#- <|return|> indicates the end of generation, but <|end|> does not #} + {#- <|return|> should never be an input to the model, but we include it as the final token #} + {#- when training, so the model learns to emit it. #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }} + {%- else %} + {#- CoT is dropped during all previous turns, so we never render it for inference #} + {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }} + {%- set last_tool_call.name = none %} + {%- endif %} + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none %} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif %} + {{- "<|start|>functions." + last_tool_call.name }} + {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} + {%- elif message.role == 'user' -%} + {{- "<|start|>user<|message|>" + message.content + "<|end|>" }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt #} +{%- if add_generation_prompt -%} +<|start|>assistant +{%- endif -%} \ No newline at end of file diff --git a/checkpoint-4926/optimizer.pt b/checkpoint-4926/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fae80070fe5c5254314654ff7f1320c8d3e6043 --- /dev/null +++ b/checkpoint-4926/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c968d107bcfa844e488d2039804f39daa06485d721080be175306ae5d96751c5 +size 120495883 diff --git a/checkpoint-4926/rng_state_0.pth b/checkpoint-4926/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..af3da00aa16764dbf845c4a4a61d39a105bd52cd --- /dev/null +++ b/checkpoint-4926/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b093dfe59b41efeb45cc3d628d3360abaa2303bbaa489081411faf431e52941d +size 16389 diff --git a/checkpoint-4926/rng_state_1.pth b/checkpoint-4926/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d70d4a22269f28aa4f3a4d45e4befc35d4c68a65 --- /dev/null +++ b/checkpoint-4926/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450a0ac1645503c0b14fe9c37d77060cc76b1c9942dcfdd0e779cd526b2e98d9 +size 16389 diff --git a/checkpoint-4926/rng_state_2.pth b/checkpoint-4926/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ac8ed70379825f64e91649a83c9280147449c79 --- /dev/null +++ b/checkpoint-4926/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:938b37918eac9a4cbef3805f7d2abdcef094a334f848e73ac19fcdc39d38663a +size 16389 diff --git a/checkpoint-4926/rng_state_3.pth b/checkpoint-4926/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fd4b119eb525f3ac85498e848098d18fc09acaf --- /dev/null +++ b/checkpoint-4926/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b27a54988f134299ab296b95e8c1e63d476dffdba7c6f120f2076e8688f355 +size 16389 diff --git a/checkpoint-4926/rng_state_4.pth b/checkpoint-4926/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..19762840f27c8ff5ea7678d5a7177c49c9a65470 --- /dev/null +++ b/checkpoint-4926/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95f73d920296d5d9558e47894c5a2c0d649d7cb10a3b07a013d6bfbd3b8cf90 +size 16389 diff --git a/checkpoint-4926/rng_state_5.pth b/checkpoint-4926/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..a187a68cd34afdb087dd7521b05533b11c855c4b --- /dev/null +++ b/checkpoint-4926/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b945bb634c9daf4a00433296ecc5245b34a2b5f09017993b5f5f03b84dabea +size 16389 diff --git a/checkpoint-4926/rng_state_6.pth b/checkpoint-4926/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ff592443db8155f2ff178732e952661dc06685d --- /dev/null +++ b/checkpoint-4926/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfdd1fca0dace16a59c8592c531a70661218184bb0249c5862bbfb5ab0844fc9 +size 16389 diff --git a/checkpoint-4926/rng_state_7.pth b/checkpoint-4926/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4396c8f58ea97f008f78588654f8667bc2a5d16f --- /dev/null +++ b/checkpoint-4926/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d106363f9f1b0ff898c86d083a097bf22fd84de35e5670aa299504abcc99752a +size 16389 diff --git a/checkpoint-4926/scheduler.pt b/checkpoint-4926/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0824cdf1b264665228bf9558c4b75f4a77f20a27 --- /dev/null +++ b/checkpoint-4926/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f477cf9e309af5f9b9168900cba260ec686699831e08565c0d200ead5d63a7f7 +size 1465 diff --git a/checkpoint-4926/special_tokens_map.json b/checkpoint-4926/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a463562962fc1910d6c169b0a7e9c95872abc92e --- /dev/null +++ b/checkpoint-4926/special_tokens_map.json @@ -0,0 +1,1817 @@ +{ + "additional_special_tokens": [ + { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-4926/tokenizer.json b/checkpoint-4926/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e0ace705cfa5bd60a370b9daafe3e1513770b9 --- /dev/null +++ b/checkpoint-4926/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec3af79eb37b392bb5382bfe3f4eeab633498c220a804f4fd5d7d102a000f1a +size 27914312 diff --git a/checkpoint-4926/tokenizer_config.json b/checkpoint-4926/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..923c92874790920d6eaf5c317c2d63cd3b569e62 --- /dev/null +++ b/checkpoint-4926/tokenizer_config.json @@ -0,0 +1,2489 @@ +{ + "added_tokens_decoder": { + "1529": { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2022": { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2416": { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3360": { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4478": { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5173": { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6258": { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6781": { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7726": { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9375": { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9760": { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10227": { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11720": { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12235": { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12515": { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13874": { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13905": { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15409": { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17183": { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19862": { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20174": { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22723": { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27968": { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29829": { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29864": { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34134": { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34435": { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39749": { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43230": { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46966": { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47787": { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47994": { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50719": { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52907": { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54793": { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58530": { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66773": { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68495": { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70684": { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72089": { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74923": { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77411": { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81990": { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82244": { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84526": { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86154": { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86297": { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93660": { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94432": { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95202": { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100413": { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104755": { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104805": { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106979": { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110871": { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117565": { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118754": { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126731": { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126978": { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130911": { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147130": { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152076": { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152095": { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152119": { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155474": { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158359": { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162121": { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166996": { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171893": { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172873": { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175772": { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177970": { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178261": { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178974": { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186075": { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187697": { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199998": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199999": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200000": { + "content": "<|reserved_200000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200001": { + "content": "<|reserved_200001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200002": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200003": { + "content": "<|constrain|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200004": { + "content": "<|reserved_200004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200005": { + "content": "<|channel|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200006": { + "content": "<|start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200007": { + "content": "<|end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200008": { + "content": "<|message|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200009": { + "content": "<|reserved_200009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200010": { + "content": "<|reserved_200010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200011": { + "content": "<|reserved_200011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200012": { + "content": "<|call|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200013": { + "content": "<|reserved_200013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200014": { + "content": "<|reserved_200014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200015": { + "content": "<|reserved_200015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200016": { + "content": "<|reserved_200016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200017": { + "content": "<|reserved_200017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200018": { + "content": "<|endofprompt|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200019": { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200020": { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200021": { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200022": { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200023": { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200024": { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200025": { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200026": { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200027": { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200028": { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200029": { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200030": { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200031": { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200032": { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200033": { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200034": { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200035": { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200036": { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200037": { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200038": { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200039": { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200040": { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200041": { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200042": { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200043": { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200044": { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200045": { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200046": { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200047": { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200048": { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200049": { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200050": { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200051": { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200052": { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200053": { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200054": { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200055": { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200056": { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200057": { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200058": { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200059": { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200060": { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200061": { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200062": { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200063": { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200064": { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200065": { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200066": { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200067": { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200068": { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200069": { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200070": { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200071": { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200072": { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200073": { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200074": { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200075": { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200076": { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200077": { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200078": { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200079": { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200080": { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200081": { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200082": { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200083": { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200084": { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200085": { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200086": { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200087": { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200088": { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200089": { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200090": { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200091": { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200092": { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200093": { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200094": { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200095": { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200096": { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200097": { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200098": { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200099": { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200100": { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200101": { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200102": { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200103": { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200104": { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200105": { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200106": { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200107": { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200108": { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200109": { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200110": { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200111": { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200112": { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200113": { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200114": { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200115": { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200116": { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200117": { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200118": { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200119": { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200120": { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200121": { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200122": { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200123": { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200124": { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200125": { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200126": { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200127": { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200128": { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200129": { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200130": { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200131": { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200132": { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200133": { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200134": { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200135": { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200136": { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200137": { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200138": { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200139": { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200140": { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200141": { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200142": { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200143": { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200144": { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200145": { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200146": { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200147": { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200148": { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200149": { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200150": { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200151": { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200152": { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200153": { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200154": { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200155": { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200156": { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200157": { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200158": { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200159": { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200160": { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200161": { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200162": { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200163": { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200164": { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200165": { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200166": { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200167": { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200168": { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200169": { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200170": { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200171": { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200172": { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200173": { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200174": { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200175": { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200176": { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200177": { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200178": { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200179": { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200180": { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200181": { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200182": { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200183": { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200184": { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200185": { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200186": { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200187": { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200188": { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200189": { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200190": { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200191": { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200192": { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200193": { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200194": { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200195": { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200196": { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200197": { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "AAD", + "AArch64", + "ACL", + "AD", + "AES", + "AES256GCM", + "AESCBC", + "AKE", + "AON", + "ASID", + "AXI", + "Acronym", + "AoU", + "AutoSar", + "BAM", + "BCH", + "BIST", + "BOM", + "BPMP", + "BPS", + "BPU", + "BRBCT", + "BW", + "C2C", + "CA", + "CAN", + "CANFD", + "CAR", + "CAVP", + "CBB", + "CBC", + "CBR", + "CCM", + "CCPLEX", + "CCPLEX_L2", + "CCPLEX_MISC", + "CCPLEX_SCF", + "CDD", + "CIF", + "CMAC", + "CPE", + "CPU", + "CRC", + "CSI", + "CSP", + "CTR", + "CTXT", + "CV", + "DBB", + "DEP", + "DEV", + "DFA", + "DFT", + "DIP", + "DISPLAY", + "DLA", + "DMA", + "DMEM", + "DPA", + "DSC", + "DT", + "DU", + "DVMU", + "EC", + "ECB", + "ECC", + "ECDHE", + "ECDSA", + "ECID", + "EDR", + "EOF", + "EOTTI", + "EQoS", + "FCL", + "FHTI", + "FIPS", + "FMEA", + "FMON", + "FO", + "FP", + "FPS", + "FW", + "FuSa", + "GCM", + "GFD", + "GIC", + "GMAC", + "GMSL", + "GOP", + "GP", + "GPCDMA", + "GPU", + "GR", + "Gpps", + "HBR", + "HBR2", + "HBR3", + "HDS", + "HIS", + "HMAC", + "HPSE", + "HSI", + "HSM", + "HSP", + "HW", + "IAS", + "IC", + "ICD", + "IDR", + "IDT", + "IEP", + "IEU", + "IFU", + "IID", + "ILD", + "IMEM", + "IOC", + "IOFA", + "IOMMU", + "IPC", + "IPI", + "IRF", + "IST", + "IV", + "IoT", + "JSR", + "KAT", + "KCV", + "KDF", + "KPI", + "L1PT", + "L2C", + "L2mDIR", + "L2vDIR", + "LAB", + "LBIST", + "LDC", + "LFT", + "LIC", + "LIP", + "LSB", + "MAC", + "MAQ", + "MB", + "MBIST", + "MCAL", + "MCE", + "MCU", + "MSB", + "MSS", + "MST", + "MTS", + "MiTM", + "NIP", + "NIST", + "NITO", + "NOC", + "NOOP", + "NT", + "NVDEC", + "NVENC", + "NVJPG", + "NVM", + "NVVSE", + "OEM", + "OFA", + "OS", + "OSP", + "OTP", + "PB", + "PCIE", + "PCPU", + "PCR", + "PCT", + "PDK", + "PII", + "PIP", + "PKC", + "PKCS", + "PKI", + "PL", + "PLA", + "POR", + "PPC", + "PSC", + "PTXT", + "PVA", + "QNX", + "QOS", + "QSPI", + "RBG", + "RBR", + "RC", + "RDEV", + "REE", + "RMA", + "RMW", + "RSA", + "RSB", + "RTS", + "RoT", + "SAE", + "SBK", + "SCH", + "SDK", + "SE", + "SEL0", + "SEL1", + "SEooC", + "SGM", + "SHA", + "SHA256", + "SHA512", + "SHE", + "SKU", + "SNOC", + "SO", + "SPA", + "SQ", + "SSR", + "SST", + "SW", + "SWAT", + "SoC", + "TA", + "TCF", + "TEE", + "THI", + "TNR", + "TOS", + "TRC", + "TRL", + "TSEC", + "TZ", + "UFS", + "VBR", + "VCPU", + "VI", + "VIC", + "VMEM", + "VMID", + "VPU", + "VRC", + "VUI", + "WARB", + "XIP", + "bpp", + "eMMC", + "hfPLA", + "iGPU", + "ipc", + "ipc_fg", + "ipc_t", + "sbPLA", + "xBTV", + "xps" + ], + "bos_token": "<|startoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|return|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|endoftext|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-4926/trainer_state.json b/checkpoint-4926/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4e5f2ada988486ae52444498f4db24e44cf76d1b --- /dev/null +++ b/checkpoint-4926/trainer_state.json @@ -0,0 +1,4462 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 4926, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0060901339829476245, + "grad_norm": 23.5319766998291, + "learning_rate": 1.2162162162162164e-05, + "loss": 4.5905, + "mean_token_accuracy": 0.3401473943144083, + "num_tokens": 132681.0, + "step": 10 + }, + { + "epoch": 0.012180267965895249, + "grad_norm": 6.916630744934082, + "learning_rate": 2.5675675675675675e-05, + "loss": 3.957, + "mean_token_accuracy": 0.3799716055393219, + "num_tokens": 264238.0, + "step": 20 + }, + { + "epoch": 0.018270401948842874, + "grad_norm": 2.717982292175293, + "learning_rate": 3.918918918918919e-05, + "loss": 3.047, + "mean_token_accuracy": 0.46811963245272636, + "num_tokens": 401308.0, + "step": 30 + }, + { + "epoch": 0.024360535931790498, + "grad_norm": 2.407865524291992, + "learning_rate": 5.27027027027027e-05, + "loss": 2.5322, + "mean_token_accuracy": 0.5190828196704388, + "num_tokens": 532444.0, + "step": 40 + }, + { + "epoch": 0.030450669914738125, + "grad_norm": 1.018301248550415, + "learning_rate": 6.621621621621621e-05, + "loss": 2.1325, + "mean_token_accuracy": 0.5782605841755867, + "num_tokens": 660406.0, + "step": 50 + }, + { + "epoch": 0.03654080389768575, + "grad_norm": 0.7315741181373596, + "learning_rate": 7.972972972972974e-05, + "loss": 1.9044, + "mean_token_accuracy": 0.6264914631843567, + "num_tokens": 795304.0, + "step": 60 + }, + { + "epoch": 0.04263093788063337, + "grad_norm": 0.6652920246124268, + "learning_rate": 9.324324324324324e-05, + "loss": 1.633, + "mean_token_accuracy": 0.6683938711881637, + "num_tokens": 934543.0, + "step": 70 + }, + { + "epoch": 0.048721071863580996, + "grad_norm": 0.6119660139083862, + "learning_rate": 0.00010675675675675677, + "loss": 1.543, + "mean_token_accuracy": 0.6834091022610664, + "num_tokens": 1070669.0, + "step": 80 + }, + { + "epoch": 0.05481120584652863, + "grad_norm": 0.591424286365509, + "learning_rate": 0.00012027027027027027, + "loss": 1.4154, + "mean_token_accuracy": 0.6991497233510018, + "num_tokens": 1211114.0, + "step": 90 + }, + { + "epoch": 0.06090133982947625, + "grad_norm": 0.5663530230522156, + "learning_rate": 0.0001337837837837838, + "loss": 1.3176, + "mean_token_accuracy": 0.7089206710457802, + "num_tokens": 1349584.0, + "step": 100 + }, + { + "epoch": 0.06699147381242387, + "grad_norm": 0.5881878137588501, + "learning_rate": 0.0001472972972972973, + "loss": 1.2293, + "mean_token_accuracy": 0.7254403859376908, + "num_tokens": 1487515.0, + "step": 110 + }, + { + "epoch": 0.0730816077953715, + "grad_norm": 0.7664394974708557, + "learning_rate": 0.00016081081081081083, + "loss": 1.1814, + "mean_token_accuracy": 0.7306812778115273, + "num_tokens": 1618603.0, + "step": 120 + }, + { + "epoch": 0.07917174177831912, + "grad_norm": 0.6155670881271362, + "learning_rate": 0.00017432432432432432, + "loss": 1.1967, + "mean_token_accuracy": 0.7284250959753991, + "num_tokens": 1750466.0, + "step": 130 + }, + { + "epoch": 0.08526187576126674, + "grad_norm": 0.5296258330345154, + "learning_rate": 0.00018783783783783784, + "loss": 1.0955, + "mean_token_accuracy": 0.7472824215888977, + "num_tokens": 1887913.0, + "step": 140 + }, + { + "epoch": 0.09135200974421437, + "grad_norm": 0.5564976334571838, + "learning_rate": 0.00019999998054550544, + "loss": 1.118, + "mean_token_accuracy": 0.7397311359643937, + "num_tokens": 2018579.0, + "step": 150 + }, + { + "epoch": 0.09744214372716199, + "grad_norm": 0.5301142930984497, + "learning_rate": 0.00019999764601633156, + "loss": 1.045, + "mean_token_accuracy": 0.7519380420446395, + "num_tokens": 2158851.0, + "step": 160 + }, + { + "epoch": 0.10353227771010962, + "grad_norm": 0.5949111580848694, + "learning_rate": 0.00019999142070388495, + "loss": 1.0497, + "mean_token_accuracy": 0.7520910769701004, + "num_tokens": 2296715.0, + "step": 170 + }, + { + "epoch": 0.10962241169305725, + "grad_norm": 0.6169262528419495, + "learning_rate": 0.0001999813048772986, + "loss": 1.0821, + "mean_token_accuracy": 0.7406247839331627, + "num_tokens": 2424756.0, + "step": 180 + }, + { + "epoch": 0.11571254567600488, + "grad_norm": 0.58912593126297, + "learning_rate": 0.00019996729897390057, + "loss": 1.0286, + "mean_token_accuracy": 0.7527454376220704, + "num_tokens": 2559362.0, + "step": 190 + }, + { + "epoch": 0.1218026796589525, + "grad_norm": 0.5084304213523865, + "learning_rate": 0.00019994940359919483, + "loss": 0.992, + "mean_token_accuracy": 0.7640391126275062, + "num_tokens": 2700231.0, + "step": 200 + }, + { + "epoch": 0.1278928136419001, + "grad_norm": 0.5790796279907227, + "learning_rate": 0.00019992761952683516, + "loss": 1.0146, + "mean_token_accuracy": 0.7554366230964661, + "num_tokens": 2831324.0, + "step": 210 + }, + { + "epoch": 0.13398294762484775, + "grad_norm": 0.5852051377296448, + "learning_rate": 0.00019990194769859188, + "loss": 0.978, + "mean_token_accuracy": 0.7612502485513687, + "num_tokens": 2967346.0, + "step": 220 + }, + { + "epoch": 0.14007308160779536, + "grad_norm": 0.5102785229682922, + "learning_rate": 0.00019987238922431088, + "loss": 0.9616, + "mean_token_accuracy": 0.7677591517567635, + "num_tokens": 3110936.0, + "step": 230 + }, + { + "epoch": 0.146163215590743, + "grad_norm": 0.5472669005393982, + "learning_rate": 0.00019983894538186576, + "loss": 0.9535, + "mean_token_accuracy": 0.76737689524889, + "num_tokens": 3247496.0, + "step": 240 + }, + { + "epoch": 0.15225334957369063, + "grad_norm": 0.5611053109169006, + "learning_rate": 0.0001998016176171026, + "loss": 0.9577, + "mean_token_accuracy": 0.7626092001795769, + "num_tokens": 3384178.0, + "step": 250 + }, + { + "epoch": 0.15834348355663824, + "grad_norm": 0.54055255651474, + "learning_rate": 0.0001997604075437774, + "loss": 0.9907, + "mean_token_accuracy": 0.7575223430991173, + "num_tokens": 3517617.0, + "step": 260 + }, + { + "epoch": 0.16443361753958588, + "grad_norm": 0.558316707611084, + "learning_rate": 0.0001997153169434864, + "loss": 0.944, + "mean_token_accuracy": 0.7664194419980049, + "num_tokens": 3662878.0, + "step": 270 + }, + { + "epoch": 0.1705237515225335, + "grad_norm": 0.49766939878463745, + "learning_rate": 0.0001996663477655889, + "loss": 0.9106, + "mean_token_accuracy": 0.7760038167238236, + "num_tokens": 3807411.0, + "step": 280 + }, + { + "epoch": 0.17661388550548113, + "grad_norm": 0.4953667223453522, + "learning_rate": 0.0001996135021271232, + "loss": 0.9687, + "mean_token_accuracy": 0.7605679705739021, + "num_tokens": 3936840.0, + "step": 290 + }, + { + "epoch": 0.18270401948842874, + "grad_norm": 0.5447947978973389, + "learning_rate": 0.00019955678231271484, + "loss": 0.9625, + "mean_token_accuracy": 0.7603292793035508, + "num_tokens": 4067826.0, + "step": 300 + }, + { + "epoch": 0.18879415347137637, + "grad_norm": 0.4665842056274414, + "learning_rate": 0.00019949619077447807, + "loss": 0.9372, + "mean_token_accuracy": 0.7676101759076118, + "num_tokens": 4205887.0, + "step": 310 + }, + { + "epoch": 0.19488428745432398, + "grad_norm": 0.515690267086029, + "learning_rate": 0.00019943173013190965, + "loss": 0.923, + "mean_token_accuracy": 0.7708473294973374, + "num_tokens": 4342894.0, + "step": 320 + }, + { + "epoch": 0.20097442143727162, + "grad_norm": 0.5831382274627686, + "learning_rate": 0.00019936340317177565, + "loss": 0.9203, + "mean_token_accuracy": 0.7708552837371826, + "num_tokens": 4477651.0, + "step": 330 + }, + { + "epoch": 0.20706455542021923, + "grad_norm": 0.6162773966789246, + "learning_rate": 0.0001992912128479911, + "loss": 0.916, + "mean_token_accuracy": 0.7702088996767997, + "num_tokens": 4610746.0, + "step": 340 + }, + { + "epoch": 0.21315468940316687, + "grad_norm": 0.5172462463378906, + "learning_rate": 0.00019921516228149207, + "loss": 0.8942, + "mean_token_accuracy": 0.7741821393370628, + "num_tokens": 4751175.0, + "step": 350 + }, + { + "epoch": 0.2192448233861145, + "grad_norm": 0.5890468955039978, + "learning_rate": 0.0001991352547601009, + "loss": 0.9229, + "mean_token_accuracy": 0.7691043332219124, + "num_tokens": 4882328.0, + "step": 360 + }, + { + "epoch": 0.22533495736906212, + "grad_norm": 0.5522404909133911, + "learning_rate": 0.00019905149373838408, + "loss": 0.9294, + "mean_token_accuracy": 0.7646071568131447, + "num_tokens": 5012181.0, + "step": 370 + }, + { + "epoch": 0.23142509135200975, + "grad_norm": 0.5349445939064026, + "learning_rate": 0.0001989638828375028, + "loss": 0.8797, + "mean_token_accuracy": 0.7771721839904785, + "num_tokens": 5151133.0, + "step": 380 + }, + { + "epoch": 0.23751522533495736, + "grad_norm": 0.531052827835083, + "learning_rate": 0.00019887242584505635, + "loss": 0.9221, + "mean_token_accuracy": 0.7678465083241462, + "num_tokens": 5279790.0, + "step": 390 + }, + { + "epoch": 0.243605359317905, + "grad_norm": 0.5126324892044067, + "learning_rate": 0.00019877712671491864, + "loss": 0.8862, + "mean_token_accuracy": 0.7739894777536392, + "num_tokens": 5412390.0, + "step": 400 + }, + { + "epoch": 0.2496954933008526, + "grad_norm": 0.5111438632011414, + "learning_rate": 0.00019867798956706693, + "loss": 0.9005, + "mean_token_accuracy": 0.7721902653574944, + "num_tokens": 5545801.0, + "step": 410 + }, + { + "epoch": 0.2557856272838002, + "grad_norm": 0.5488138794898987, + "learning_rate": 0.00019857501868740402, + "loss": 0.8988, + "mean_token_accuracy": 0.7690282896161079, + "num_tokens": 5673758.0, + "step": 420 + }, + { + "epoch": 0.2618757612667479, + "grad_norm": 0.5497994422912598, + "learning_rate": 0.0001984682185275727, + "loss": 0.8802, + "mean_token_accuracy": 0.7780183687806129, + "num_tokens": 5813158.0, + "step": 430 + }, + { + "epoch": 0.2679658952496955, + "grad_norm": 0.5478431582450867, + "learning_rate": 0.0001983575937047635, + "loss": 0.865, + "mean_token_accuracy": 0.7785944610834121, + "num_tokens": 5947367.0, + "step": 440 + }, + { + "epoch": 0.2740560292326431, + "grad_norm": 0.5188766717910767, + "learning_rate": 0.00019824314900151487, + "loss": 0.8798, + "mean_token_accuracy": 0.7752803862094879, + "num_tokens": 6081060.0, + "step": 450 + }, + { + "epoch": 0.2801461632155907, + "grad_norm": 0.530222475528717, + "learning_rate": 0.00019812488936550666, + "loss": 0.8628, + "mean_token_accuracy": 0.7801630645990372, + "num_tokens": 6217834.0, + "step": 460 + }, + { + "epoch": 0.2862362971985384, + "grad_norm": 0.5987964868545532, + "learning_rate": 0.00019800281990934614, + "loss": 0.8775, + "mean_token_accuracy": 0.7760324433445931, + "num_tokens": 6350451.0, + "step": 470 + }, + { + "epoch": 0.292326431181486, + "grad_norm": 0.5468559265136719, + "learning_rate": 0.0001978769459103468, + "loss": 0.8721, + "mean_token_accuracy": 0.7794204503297806, + "num_tokens": 6484738.0, + "step": 480 + }, + { + "epoch": 0.2984165651644336, + "grad_norm": 0.5541098117828369, + "learning_rate": 0.0001977472728103005, + "loss": 0.8785, + "mean_token_accuracy": 0.7767582029104233, + "num_tokens": 6619313.0, + "step": 490 + }, + { + "epoch": 0.30450669914738127, + "grad_norm": 0.5134281516075134, + "learning_rate": 0.0001976138062152419, + "loss": 0.8717, + "mean_token_accuracy": 0.7752724394202233, + "num_tokens": 6753195.0, + "step": 500 + }, + { + "epoch": 0.3105968331303289, + "grad_norm": 0.49164435267448425, + "learning_rate": 0.00019747655189520633, + "loss": 0.8757, + "mean_token_accuracy": 0.7768464118242264, + "num_tokens": 6890448.0, + "step": 510 + }, + { + "epoch": 0.3166869671132765, + "grad_norm": 0.5899345278739929, + "learning_rate": 0.00019733551578398023, + "loss": 0.8322, + "mean_token_accuracy": 0.7859320402145386, + "num_tokens": 7027488.0, + "step": 520 + }, + { + "epoch": 0.3227771010962241, + "grad_norm": 0.6552841663360596, + "learning_rate": 0.0001971907039788447, + "loss": 0.861, + "mean_token_accuracy": 0.7770532324910164, + "num_tokens": 7161184.0, + "step": 530 + }, + { + "epoch": 0.32886723507917176, + "grad_norm": 0.5038822889328003, + "learning_rate": 0.0001970421227403117, + "loss": 0.8825, + "mean_token_accuracy": 0.775890800356865, + "num_tokens": 7294399.0, + "step": 540 + }, + { + "epoch": 0.33495736906211937, + "grad_norm": 0.5094267129898071, + "learning_rate": 0.00019688977849185378, + "loss": 0.8598, + "mean_token_accuracy": 0.7817838475108146, + "num_tokens": 7427183.0, + "step": 550 + }, + { + "epoch": 0.341047503045067, + "grad_norm": 0.5282809138298035, + "learning_rate": 0.00019673367781962594, + "loss": 0.8463, + "mean_token_accuracy": 0.7812959104776382, + "num_tokens": 7561734.0, + "step": 560 + }, + { + "epoch": 0.3471376370280146, + "grad_norm": 0.45355409383773804, + "learning_rate": 0.00019657382747218123, + "loss": 0.8207, + "mean_token_accuracy": 0.7888262197375298, + "num_tokens": 7706228.0, + "step": 570 + }, + { + "epoch": 0.35322777101096225, + "grad_norm": 0.5162333846092224, + "learning_rate": 0.00019641023436017883, + "loss": 0.8235, + "mean_token_accuracy": 0.7868947923183441, + "num_tokens": 7846684.0, + "step": 580 + }, + { + "epoch": 0.35931790499390986, + "grad_norm": 0.5194632411003113, + "learning_rate": 0.00019624290555608526, + "loss": 0.8129, + "mean_token_accuracy": 0.7884069249033928, + "num_tokens": 7986811.0, + "step": 590 + }, + { + "epoch": 0.3654080389768575, + "grad_norm": 0.5494846701622009, + "learning_rate": 0.00019607184829386882, + "loss": 0.8084, + "mean_token_accuracy": 0.7874000474810601, + "num_tokens": 8124538.0, + "step": 600 + }, + { + "epoch": 0.37149817295980514, + "grad_norm": 0.5368776917457581, + "learning_rate": 0.0001958970699686866, + "loss": 0.8225, + "mean_token_accuracy": 0.783010233938694, + "num_tokens": 8260529.0, + "step": 610 + }, + { + "epoch": 0.37758830694275275, + "grad_norm": 0.6229024529457092, + "learning_rate": 0.00019571857813656496, + "loss": 0.8786, + "mean_token_accuracy": 0.7753148928284646, + "num_tokens": 8389042.0, + "step": 620 + }, + { + "epoch": 0.38367844092570036, + "grad_norm": 0.5601000785827637, + "learning_rate": 0.00019553638051407279, + "loss": 0.8909, + "mean_token_accuracy": 0.7745720192790031, + "num_tokens": 8513603.0, + "step": 630 + }, + { + "epoch": 0.38976857490864797, + "grad_norm": 0.438970685005188, + "learning_rate": 0.0001953504849779879, + "loss": 0.8085, + "mean_token_accuracy": 0.7871840804815292, + "num_tokens": 8652970.0, + "step": 640 + }, + { + "epoch": 0.39585870889159563, + "grad_norm": 0.5505132079124451, + "learning_rate": 0.00019516089956495648, + "loss": 0.8102, + "mean_token_accuracy": 0.7869585514068603, + "num_tokens": 8792103.0, + "step": 650 + }, + { + "epoch": 0.40194884287454324, + "grad_norm": 0.5447221398353577, + "learning_rate": 0.00019496763247114581, + "loss": 0.8336, + "mean_token_accuracy": 0.7816034242510795, + "num_tokens": 8926853.0, + "step": 660 + }, + { + "epoch": 0.40803897685749085, + "grad_norm": 0.4652746915817261, + "learning_rate": 0.00019477069205188965, + "loss": 0.8383, + "mean_token_accuracy": 0.7826304718852043, + "num_tokens": 9059592.0, + "step": 670 + }, + { + "epoch": 0.41412911084043846, + "grad_norm": 0.42363590002059937, + "learning_rate": 0.00019457008682132726, + "loss": 0.847, + "mean_token_accuracy": 0.7810002073645592, + "num_tokens": 9193062.0, + "step": 680 + }, + { + "epoch": 0.42021924482338613, + "grad_norm": 0.5209478735923767, + "learning_rate": 0.00019436582545203518, + "loss": 0.8766, + "mean_token_accuracy": 0.7733785718679428, + "num_tokens": 9315805.0, + "step": 690 + }, + { + "epoch": 0.42630937880633374, + "grad_norm": 0.5176642537117004, + "learning_rate": 0.00019415791677465237, + "loss": 0.8155, + "mean_token_accuracy": 0.7869213685393334, + "num_tokens": 9448863.0, + "step": 700 + }, + { + "epoch": 0.43239951278928135, + "grad_norm": 0.4531058371067047, + "learning_rate": 0.00019394636977749843, + "loss": 0.8096, + "mean_token_accuracy": 0.7903949975967407, + "num_tokens": 9589382.0, + "step": 710 + }, + { + "epoch": 0.438489646772229, + "grad_norm": 0.5651549100875854, + "learning_rate": 0.000193731193606185, + "loss": 0.8263, + "mean_token_accuracy": 0.7823062822222709, + "num_tokens": 9723562.0, + "step": 720 + }, + { + "epoch": 0.4445797807551766, + "grad_norm": 0.5377989411354065, + "learning_rate": 0.00019351239756322031, + "loss": 0.7993, + "mean_token_accuracy": 0.7908329650759697, + "num_tokens": 9859255.0, + "step": 730 + }, + { + "epoch": 0.45066991473812423, + "grad_norm": 0.5420868396759033, + "learning_rate": 0.00019328999110760722, + "loss": 0.8461, + "mean_token_accuracy": 0.7780480548739434, + "num_tokens": 9981578.0, + "step": 740 + }, + { + "epoch": 0.45676004872107184, + "grad_norm": 0.4889216125011444, + "learning_rate": 0.000193063983854434, + "loss": 0.7652, + "mean_token_accuracy": 0.7959530428051949, + "num_tokens": 10122922.0, + "step": 750 + }, + { + "epoch": 0.4628501827040195, + "grad_norm": 0.5044087767601013, + "learning_rate": 0.00019283438557445893, + "loss": 0.824, + "mean_token_accuracy": 0.7845935523509979, + "num_tokens": 10252854.0, + "step": 760 + }, + { + "epoch": 0.4689403166869671, + "grad_norm": 0.5286466479301453, + "learning_rate": 0.00019260120619368773, + "loss": 0.815, + "mean_token_accuracy": 0.7850656941533088, + "num_tokens": 10385075.0, + "step": 770 + }, + { + "epoch": 0.47503045066991473, + "grad_norm": 0.5441628694534302, + "learning_rate": 0.00019236445579294437, + "loss": 0.8048, + "mean_token_accuracy": 0.7876680314540863, + "num_tokens": 10520011.0, + "step": 780 + }, + { + "epoch": 0.48112058465286234, + "grad_norm": 0.49002447724342346, + "learning_rate": 0.0001921241446074355, + "loss": 0.8059, + "mean_token_accuracy": 0.7898563235998154, + "num_tokens": 10652488.0, + "step": 790 + }, + { + "epoch": 0.48721071863581, + "grad_norm": 0.4479144811630249, + "learning_rate": 0.0001918802830263077, + "loss": 0.7913, + "mean_token_accuracy": 0.7928732186555862, + "num_tokens": 10785974.0, + "step": 800 + }, + { + "epoch": 0.4933008526187576, + "grad_norm": 0.5007497668266296, + "learning_rate": 0.00019163288159219853, + "loss": 0.8083, + "mean_token_accuracy": 0.7893043681979179, + "num_tokens": 10920950.0, + "step": 810 + }, + { + "epoch": 0.4993909866017052, + "grad_norm": 0.5289483070373535, + "learning_rate": 0.00019138195100078064, + "loss": 0.8033, + "mean_token_accuracy": 0.7864485770463944, + "num_tokens": 11056380.0, + "step": 820 + }, + { + "epoch": 0.5054811205846529, + "grad_norm": 0.5604159832000732, + "learning_rate": 0.0001911275021002994, + "loss": 0.7652, + "mean_token_accuracy": 0.7946401730179786, + "num_tokens": 11196074.0, + "step": 830 + }, + { + "epoch": 0.5115712545676004, + "grad_norm": 0.43645399808883667, + "learning_rate": 0.00019086954589110397, + "loss": 0.7724, + "mean_token_accuracy": 0.7990294560790062, + "num_tokens": 11337990.0, + "step": 840 + }, + { + "epoch": 0.5176613885505481, + "grad_norm": 0.43992146849632263, + "learning_rate": 0.0001906080935251716, + "loss": 0.7612, + "mean_token_accuracy": 0.7999786615371705, + "num_tokens": 11481565.0, + "step": 850 + }, + { + "epoch": 0.5237515225334958, + "grad_norm": 0.5595120191574097, + "learning_rate": 0.0001903431563056256, + "loss": 0.8266, + "mean_token_accuracy": 0.7859750911593437, + "num_tokens": 11611714.0, + "step": 860 + }, + { + "epoch": 0.5298416565164433, + "grad_norm": 0.5001987218856812, + "learning_rate": 0.0001900747456862467, + "loss": 0.8506, + "mean_token_accuracy": 0.779585388302803, + "num_tokens": 11736573.0, + "step": 870 + }, + { + "epoch": 0.535931790499391, + "grad_norm": 0.430147647857666, + "learning_rate": 0.00018980287327097784, + "loss": 0.7707, + "mean_token_accuracy": 0.795211361348629, + "num_tokens": 11876859.0, + "step": 880 + }, + { + "epoch": 0.5420219244823387, + "grad_norm": 0.5346289873123169, + "learning_rate": 0.00018952755081342245, + "loss": 0.8057, + "mean_token_accuracy": 0.7871127843856811, + "num_tokens": 12007654.0, + "step": 890 + }, + { + "epoch": 0.5481120584652862, + "grad_norm": 0.46072253584861755, + "learning_rate": 0.00018924879021633653, + "loss": 0.7924, + "mean_token_accuracy": 0.7913773030042648, + "num_tokens": 12140520.0, + "step": 900 + }, + { + "epoch": 0.5542021924482339, + "grad_norm": 0.4803653955459595, + "learning_rate": 0.00018896660353111375, + "loss": 0.8398, + "mean_token_accuracy": 0.7807079553604126, + "num_tokens": 12267219.0, + "step": 910 + }, + { + "epoch": 0.5602923264311814, + "grad_norm": 0.5219636559486389, + "learning_rate": 0.0001886810029572647, + "loss": 0.7612, + "mean_token_accuracy": 0.7993015512824059, + "num_tokens": 12404646.0, + "step": 920 + }, + { + "epoch": 0.5663824604141291, + "grad_norm": 0.501483142375946, + "learning_rate": 0.00018839200084188936, + "loss": 0.7953, + "mean_token_accuracy": 0.787814213335514, + "num_tokens": 12538219.0, + "step": 930 + }, + { + "epoch": 0.5724725943970768, + "grad_norm": 0.47334522008895874, + "learning_rate": 0.00018809960967914346, + "loss": 0.789, + "mean_token_accuracy": 0.7928574904799461, + "num_tokens": 12673805.0, + "step": 940 + }, + { + "epoch": 0.5785627283800243, + "grad_norm": 0.5057492852210999, + "learning_rate": 0.00018780384210969806, + "loss": 0.7746, + "mean_token_accuracy": 0.7947553545236588, + "num_tokens": 12811727.0, + "step": 950 + }, + { + "epoch": 0.584652862362972, + "grad_norm": 0.5179910659790039, + "learning_rate": 0.00018750471092019325, + "loss": 0.7962, + "mean_token_accuracy": 0.7905686929821968, + "num_tokens": 12947641.0, + "step": 960 + }, + { + "epoch": 0.5907429963459196, + "grad_norm": 0.45797088742256165, + "learning_rate": 0.00018720222904268543, + "loss": 0.7678, + "mean_token_accuracy": 0.7969774708151818, + "num_tokens": 13083869.0, + "step": 970 + }, + { + "epoch": 0.5968331303288672, + "grad_norm": 0.48360612988471985, + "learning_rate": 0.00018689640955408803, + "loss": 0.7996, + "mean_token_accuracy": 0.7885591968894005, + "num_tokens": 13211807.0, + "step": 980 + }, + { + "epoch": 0.6029232643118149, + "grad_norm": 0.4378497004508972, + "learning_rate": 0.00018658726567560635, + "loss": 0.7652, + "mean_token_accuracy": 0.7969291344285011, + "num_tokens": 13351856.0, + "step": 990 + }, + { + "epoch": 0.6090133982947625, + "grad_norm": 0.4857536852359772, + "learning_rate": 0.00018627481077216577, + "loss": 0.7786, + "mean_token_accuracy": 0.7914443418383599, + "num_tokens": 13486443.0, + "step": 1000 + }, + { + "epoch": 0.6151035322777101, + "grad_norm": 0.5233064293861389, + "learning_rate": 0.0001859590583518343, + "loss": 0.8241, + "mean_token_accuracy": 0.7811850637197495, + "num_tokens": 13612035.0, + "step": 1010 + }, + { + "epoch": 0.6211936662606578, + "grad_norm": 0.5328738689422607, + "learning_rate": 0.00018564002206523816, + "loss": 0.7502, + "mean_token_accuracy": 0.7993430674076081, + "num_tokens": 13756509.0, + "step": 1020 + }, + { + "epoch": 0.6272838002436053, + "grad_norm": 0.47962310910224915, + "learning_rate": 0.000185317715704972, + "loss": 0.7984, + "mean_token_accuracy": 0.7864531084895134, + "num_tokens": 13883033.0, + "step": 1030 + }, + { + "epoch": 0.633373934226553, + "grad_norm": 0.5685893893241882, + "learning_rate": 0.0001849921532050024, + "loss": 0.7869, + "mean_token_accuracy": 0.7909937381744385, + "num_tokens": 14015234.0, + "step": 1040 + }, + { + "epoch": 0.6394640682095006, + "grad_norm": 0.49146631360054016, + "learning_rate": 0.00018466334864006566, + "loss": 0.7952, + "mean_token_accuracy": 0.7878949210047722, + "num_tokens": 14149319.0, + "step": 1050 + }, + { + "epoch": 0.6455542021924482, + "grad_norm": 0.5556225776672363, + "learning_rate": 0.0001843313162250591, + "loss": 0.7524, + "mean_token_accuracy": 0.7994373366236687, + "num_tokens": 14286868.0, + "step": 1060 + }, + { + "epoch": 0.6516443361753959, + "grad_norm": 0.511379063129425, + "learning_rate": 0.00018399607031442666, + "loss": 0.7929, + "mean_token_accuracy": 0.7921562284231186, + "num_tokens": 14418354.0, + "step": 1070 + }, + { + "epoch": 0.6577344701583435, + "grad_norm": 0.5019840598106384, + "learning_rate": 0.00018365762540153836, + "loss": 0.758, + "mean_token_accuracy": 0.7989353060722351, + "num_tokens": 14553174.0, + "step": 1080 + }, + { + "epoch": 0.6638246041412911, + "grad_norm": 0.6032467484474182, + "learning_rate": 0.00018331599611806366, + "loss": 0.7888, + "mean_token_accuracy": 0.7903819754719734, + "num_tokens": 14681393.0, + "step": 1090 + }, + { + "epoch": 0.6699147381242387, + "grad_norm": 0.5369830131530762, + "learning_rate": 0.00018297119723333877, + "loss": 0.765, + "mean_token_accuracy": 0.7950262635946274, + "num_tokens": 14814565.0, + "step": 1100 + }, + { + "epoch": 0.6760048721071864, + "grad_norm": 0.5289803743362427, + "learning_rate": 0.00018262324365372846, + "loss": 0.7496, + "mean_token_accuracy": 0.8032818242907525, + "num_tokens": 14954351.0, + "step": 1110 + }, + { + "epoch": 0.682095006090134, + "grad_norm": 0.5440439581871033, + "learning_rate": 0.0001822721504219814, + "loss": 0.7432, + "mean_token_accuracy": 0.799126236140728, + "num_tokens": 15094879.0, + "step": 1120 + }, + { + "epoch": 0.6881851400730816, + "grad_norm": 0.46225935220718384, + "learning_rate": 0.00018191793271657978, + "loss": 0.7513, + "mean_token_accuracy": 0.8022688791155815, + "num_tokens": 15234906.0, + "step": 1130 + }, + { + "epoch": 0.6942752740560292, + "grad_norm": 0.5592020750045776, + "learning_rate": 0.0001815606058510833, + "loss": 0.7583, + "mean_token_accuracy": 0.7984497547149658, + "num_tokens": 15373526.0, + "step": 1140 + }, + { + "epoch": 0.7003654080389768, + "grad_norm": 0.525090217590332, + "learning_rate": 0.00018120018527346702, + "loss": 0.7254, + "mean_token_accuracy": 0.8070619881153107, + "num_tokens": 15516264.0, + "step": 1150 + }, + { + "epoch": 0.7064555420219245, + "grad_norm": 0.5380759239196777, + "learning_rate": 0.00018083668656545355, + "loss": 0.8041, + "mean_token_accuracy": 0.7866759791970253, + "num_tokens": 15640444.0, + "step": 1160 + }, + { + "epoch": 0.7125456760048721, + "grad_norm": 0.47815701365470886, + "learning_rate": 0.00018047012544183938, + "loss": 0.7604, + "mean_token_accuracy": 0.796156468987465, + "num_tokens": 15778070.0, + "step": 1170 + }, + { + "epoch": 0.7186358099878197, + "grad_norm": 0.5380450487136841, + "learning_rate": 0.00018010051774981553, + "loss": 0.8135, + "mean_token_accuracy": 0.7842124432325364, + "num_tokens": 15899739.0, + "step": 1180 + }, + { + "epoch": 0.7247259439707674, + "grad_norm": 0.5047502517700195, + "learning_rate": 0.00017972787946828246, + "loss": 0.7642, + "mean_token_accuracy": 0.7989341139793396, + "num_tokens": 16035805.0, + "step": 1190 + }, + { + "epoch": 0.730816077953715, + "grad_norm": 0.5440967679023743, + "learning_rate": 0.00017935222670715918, + "loss": 0.735, + "mean_token_accuracy": 0.8048294603824615, + "num_tokens": 16172541.0, + "step": 1200 + }, + { + "epoch": 0.7369062119366626, + "grad_norm": 0.4766077399253845, + "learning_rate": 0.000178973575706687, + "loss": 0.805, + "mean_token_accuracy": 0.7871790423989296, + "num_tokens": 16296988.0, + "step": 1210 + }, + { + "epoch": 0.7429963459196103, + "grad_norm": 0.4153214991092682, + "learning_rate": 0.00017859194283672704, + "loss": 0.7635, + "mean_token_accuracy": 0.7964595645666123, + "num_tokens": 16432022.0, + "step": 1220 + }, + { + "epoch": 0.7490864799025578, + "grad_norm": 0.4698518216609955, + "learning_rate": 0.00017820734459605302, + "loss": 0.7397, + "mean_token_accuracy": 0.8046972885727882, + "num_tokens": 16572880.0, + "step": 1230 + }, + { + "epoch": 0.7551766138855055, + "grad_norm": 0.46101540327072144, + "learning_rate": 0.00017781979761163756, + "loss": 0.7174, + "mean_token_accuracy": 0.8066875368356705, + "num_tokens": 16714419.0, + "step": 1240 + }, + { + "epoch": 0.761266747868453, + "grad_norm": 0.5313341021537781, + "learning_rate": 0.00017742931863793358, + "loss": 0.7797, + "mean_token_accuracy": 0.7911526098847389, + "num_tokens": 16838285.0, + "step": 1250 + }, + { + "epoch": 0.7673568818514007, + "grad_norm": 0.4627362787723541, + "learning_rate": 0.00017703592455614998, + "loss": 0.7626, + "mean_token_accuracy": 0.7970306649804115, + "num_tokens": 16976065.0, + "step": 1260 + }, + { + "epoch": 0.7734470158343484, + "grad_norm": 0.5429073572158813, + "learning_rate": 0.00017663963237352177, + "loss": 0.7398, + "mean_token_accuracy": 0.8005403786897659, + "num_tokens": 17112901.0, + "step": 1270 + }, + { + "epoch": 0.7795371498172959, + "grad_norm": 0.6781270503997803, + "learning_rate": 0.00017624045922257471, + "loss": 0.7607, + "mean_token_accuracy": 0.7946217939257622, + "num_tokens": 17245480.0, + "step": 1280 + }, + { + "epoch": 0.7856272838002436, + "grad_norm": 0.5227305293083191, + "learning_rate": 0.00017583842236038483, + "loss": 0.7217, + "mean_token_accuracy": 0.8064659267663956, + "num_tokens": 17387171.0, + "step": 1290 + }, + { + "epoch": 0.7917174177831913, + "grad_norm": 0.49253156781196594, + "learning_rate": 0.0001754335391678323, + "loss": 0.7652, + "mean_token_accuracy": 0.7960015773773194, + "num_tokens": 17521164.0, + "step": 1300 + }, + { + "epoch": 0.7978075517661388, + "grad_norm": 0.5103631615638733, + "learning_rate": 0.00017502582714884997, + "loss": 0.7435, + "mean_token_accuracy": 0.7995276898145676, + "num_tokens": 17657818.0, + "step": 1310 + }, + { + "epoch": 0.8038976857490865, + "grad_norm": 0.5531247854232788, + "learning_rate": 0.00017461530392966665, + "loss": 0.7986, + "mean_token_accuracy": 0.7892467245459557, + "num_tokens": 17784361.0, + "step": 1320 + }, + { + "epoch": 0.8099878197320342, + "grad_norm": 0.4574586749076843, + "learning_rate": 0.00017420198725804517, + "loss": 0.6889, + "mean_token_accuracy": 0.8135112956166267, + "num_tokens": 17929664.0, + "step": 1330 + }, + { + "epoch": 0.8160779537149817, + "grad_norm": 0.4734383225440979, + "learning_rate": 0.00017378589500251498, + "loss": 0.7308, + "mean_token_accuracy": 0.8029947131872177, + "num_tokens": 18071182.0, + "step": 1340 + }, + { + "epoch": 0.8221680876979294, + "grad_norm": 0.5192279815673828, + "learning_rate": 0.00017336704515159986, + "loss": 0.7444, + "mean_token_accuracy": 0.8012512847781181, + "num_tokens": 18211136.0, + "step": 1350 + }, + { + "epoch": 0.8282582216808769, + "grad_norm": 0.5378620624542236, + "learning_rate": 0.00017294545581303996, + "loss": 0.7459, + "mean_token_accuracy": 0.7981989249587059, + "num_tokens": 18340645.0, + "step": 1360 + }, + { + "epoch": 0.8343483556638246, + "grad_norm": 0.4879571497440338, + "learning_rate": 0.00017252114521300918, + "loss": 0.7877, + "mean_token_accuracy": 0.7891893342137337, + "num_tokens": 18465733.0, + "step": 1370 + }, + { + "epoch": 0.8404384896467723, + "grad_norm": 0.5297388434410095, + "learning_rate": 0.00017209413169532717, + "loss": 0.7586, + "mean_token_accuracy": 0.797142505645752, + "num_tokens": 18598979.0, + "step": 1380 + }, + { + "epoch": 0.8465286236297198, + "grad_norm": 0.5308396220207214, + "learning_rate": 0.00017166443372066618, + "loss": 0.7387, + "mean_token_accuracy": 0.80123979896307, + "num_tokens": 18735919.0, + "step": 1390 + }, + { + "epoch": 0.8526187576126675, + "grad_norm": 0.49988579750061035, + "learning_rate": 0.0001712320698657532, + "loss": 0.7425, + "mean_token_accuracy": 0.7996803268790245, + "num_tokens": 18870877.0, + "step": 1400 + }, + { + "epoch": 0.8587088915956151, + "grad_norm": 0.5971361994743347, + "learning_rate": 0.0001707970588225665, + "loss": 0.7691, + "mean_token_accuracy": 0.7922965154051781, + "num_tokens": 19000943.0, + "step": 1410 + }, + { + "epoch": 0.8647990255785627, + "grad_norm": 0.5141698718070984, + "learning_rate": 0.00017035941939752802, + "loss": 0.7203, + "mean_token_accuracy": 0.8036229625344277, + "num_tokens": 19135039.0, + "step": 1420 + }, + { + "epoch": 0.8708891595615104, + "grad_norm": 0.4647749066352844, + "learning_rate": 0.0001699191705106898, + "loss": 0.7136, + "mean_token_accuracy": 0.8064323276281357, + "num_tokens": 19274069.0, + "step": 1430 + }, + { + "epoch": 0.876979293544458, + "grad_norm": 0.5511934161186218, + "learning_rate": 0.00016947633119491633, + "loss": 0.7455, + "mean_token_accuracy": 0.7985599264502525, + "num_tokens": 19409679.0, + "step": 1440 + }, + { + "epoch": 0.8830694275274056, + "grad_norm": 0.4936945140361786, + "learning_rate": 0.00016903092059506182, + "loss": 0.7087, + "mean_token_accuracy": 0.806523185968399, + "num_tokens": 19547419.0, + "step": 1450 + }, + { + "epoch": 0.8891595615103532, + "grad_norm": 0.5227787494659424, + "learning_rate": 0.00016858295796714213, + "loss": 0.7739, + "mean_token_accuracy": 0.7941467314958572, + "num_tokens": 19674455.0, + "step": 1460 + }, + { + "epoch": 0.8952496954933008, + "grad_norm": 0.5046219825744629, + "learning_rate": 0.00016813246267750282, + "loss": 0.7361, + "mean_token_accuracy": 0.8008369222283364, + "num_tokens": 19809861.0, + "step": 1470 + }, + { + "epoch": 0.9013398294762485, + "grad_norm": 0.4827081263065338, + "learning_rate": 0.00016767945420198142, + "loss": 0.7464, + "mean_token_accuracy": 0.7986427888274192, + "num_tokens": 19940696.0, + "step": 1480 + }, + { + "epoch": 0.9074299634591961, + "grad_norm": 0.4970889687538147, + "learning_rate": 0.00016722395212506567, + "loss": 0.7528, + "mean_token_accuracy": 0.7965970665216446, + "num_tokens": 20070686.0, + "step": 1490 + }, + { + "epoch": 0.9135200974421437, + "grad_norm": 0.44478070735931396, + "learning_rate": 0.00016676597613904693, + "loss": 0.7185, + "mean_token_accuracy": 0.8081388726830483, + "num_tokens": 20210260.0, + "step": 1500 + }, + { + "epoch": 0.9196102314250914, + "grad_norm": 0.506136417388916, + "learning_rate": 0.00016630554604316866, + "loss": 0.7395, + "mean_token_accuracy": 0.8003876298666001, + "num_tokens": 20346235.0, + "step": 1510 + }, + { + "epoch": 0.925700365408039, + "grad_norm": 0.500946044921875, + "learning_rate": 0.00016584268174277053, + "loss": 0.6889, + "mean_token_accuracy": 0.8124501362442971, + "num_tokens": 20481248.0, + "step": 1520 + }, + { + "epoch": 0.9317904993909866, + "grad_norm": 0.48528990149497986, + "learning_rate": 0.00016537740324842795, + "loss": 0.7227, + "mean_token_accuracy": 0.8041250064969063, + "num_tokens": 20613531.0, + "step": 1530 + }, + { + "epoch": 0.9378806333739342, + "grad_norm": 0.5070951581001282, + "learning_rate": 0.00016490973067508674, + "loss": 0.7091, + "mean_token_accuracy": 0.8082544595003128, + "num_tokens": 20750784.0, + "step": 1540 + }, + { + "epoch": 0.9439707673568819, + "grad_norm": 0.5583120584487915, + "learning_rate": 0.0001644396842411939, + "loss": 0.7405, + "mean_token_accuracy": 0.7992320343852043, + "num_tokens": 20883646.0, + "step": 1550 + }, + { + "epoch": 0.9500609013398295, + "grad_norm": 0.5099635124206543, + "learning_rate": 0.00016396728426782312, + "loss": 0.7103, + "mean_token_accuracy": 0.8091216519474983, + "num_tokens": 21025143.0, + "step": 1560 + }, + { + "epoch": 0.9561510353227771, + "grad_norm": 0.5777808427810669, + "learning_rate": 0.00016349255117779652, + "loss": 0.7245, + "mean_token_accuracy": 0.8023119494318962, + "num_tokens": 21160014.0, + "step": 1570 + }, + { + "epoch": 0.9622411693057247, + "grad_norm": 0.5206162333488464, + "learning_rate": 0.0001630155054948016, + "loss": 0.7185, + "mean_token_accuracy": 0.8069521963596344, + "num_tokens": 21299094.0, + "step": 1580 + }, + { + "epoch": 0.9683313032886723, + "grad_norm": 0.5763202905654907, + "learning_rate": 0.00016253616784250415, + "loss": 0.7677, + "mean_token_accuracy": 0.7927820891141891, + "num_tokens": 21429252.0, + "step": 1590 + }, + { + "epoch": 0.97442143727162, + "grad_norm": 0.5068426728248596, + "learning_rate": 0.00016205455894365627, + "loss": 0.7673, + "mean_token_accuracy": 0.794715291261673, + "num_tokens": 21556200.0, + "step": 1600 + }, + { + "epoch": 0.9805115712545676, + "grad_norm": 0.46094459295272827, + "learning_rate": 0.0001615706996192009, + "loss": 0.771, + "mean_token_accuracy": 0.7921045809984207, + "num_tokens": 21681524.0, + "step": 1610 + }, + { + "epoch": 0.9866017052375152, + "grad_norm": 0.5063546299934387, + "learning_rate": 0.00016108461078737148, + "loss": 0.7383, + "mean_token_accuracy": 0.800596435368061, + "num_tokens": 21814109.0, + "step": 1620 + }, + { + "epoch": 0.9926918392204629, + "grad_norm": 0.5418652296066284, + "learning_rate": 0.0001605963134627876, + "loss": 0.7431, + "mean_token_accuracy": 0.7994748756289483, + "num_tokens": 21947346.0, + "step": 1630 + }, + { + "epoch": 0.9987819732034104, + "grad_norm": 0.6195595264434814, + "learning_rate": 0.0001601058287555465, + "loss": 0.7294, + "mean_token_accuracy": 0.8030684441328049, + "num_tokens": 22081340.0, + "step": 1640 + }, + { + "epoch": 1.004872107186358, + "grad_norm": 0.5930359363555908, + "learning_rate": 0.00015961317787031054, + "loss": 0.7387, + "mean_token_accuracy": 0.8013696864247322, + "num_tokens": 22206441.0, + "step": 1650 + }, + { + "epoch": 1.0109622411693058, + "grad_norm": 0.4926474094390869, + "learning_rate": 0.00015911838210539038, + "loss": 0.6743, + "mean_token_accuracy": 0.8141208037734031, + "num_tokens": 22344898.0, + "step": 1660 + }, + { + "epoch": 1.0170523751522533, + "grad_norm": 0.5331000685691833, + "learning_rate": 0.0001586214628518242, + "loss": 0.7033, + "mean_token_accuracy": 0.807385990023613, + "num_tokens": 22483135.0, + "step": 1670 + }, + { + "epoch": 1.0231425091352009, + "grad_norm": 0.5267267227172852, + "learning_rate": 0.0001581224415924531, + "loss": 0.6717, + "mean_token_accuracy": 0.8178876608610153, + "num_tokens": 22617934.0, + "step": 1680 + }, + { + "epoch": 1.0292326431181487, + "grad_norm": 0.5864041447639465, + "learning_rate": 0.00015762133990099205, + "loss": 0.7421, + "mean_token_accuracy": 0.7981289237737655, + "num_tokens": 22745190.0, + "step": 1690 + }, + { + "epoch": 1.0353227771010962, + "grad_norm": 0.45681944489479065, + "learning_rate": 0.00015711817944109738, + "loss": 0.6646, + "mean_token_accuracy": 0.8146520599722862, + "num_tokens": 22887536.0, + "step": 1700 + }, + { + "epoch": 1.0414129110840438, + "grad_norm": 0.5522484183311462, + "learning_rate": 0.00015661298196543042, + "loss": 0.6889, + "mean_token_accuracy": 0.8100781336426734, + "num_tokens": 23017586.0, + "step": 1710 + }, + { + "epoch": 1.0475030450669915, + "grad_norm": 0.5221629738807678, + "learning_rate": 0.00015610576931471658, + "loss": 0.6939, + "mean_token_accuracy": 0.8114214852452278, + "num_tokens": 23151737.0, + "step": 1720 + }, + { + "epoch": 1.053593179049939, + "grad_norm": 0.5104020833969116, + "learning_rate": 0.00015559656341680164, + "loss": 0.716, + "mean_token_accuracy": 0.8063826873898506, + "num_tokens": 23280778.0, + "step": 1730 + }, + { + "epoch": 1.0596833130328867, + "grad_norm": 0.5163984298706055, + "learning_rate": 0.00015508538628570352, + "loss": 0.7188, + "mean_token_accuracy": 0.802527217566967, + "num_tokens": 23410327.0, + "step": 1740 + }, + { + "epoch": 1.0657734470158344, + "grad_norm": 0.5188373327255249, + "learning_rate": 0.00015457226002066058, + "loss": 0.6791, + "mean_token_accuracy": 0.8127639785408973, + "num_tokens": 23548616.0, + "step": 1750 + }, + { + "epoch": 1.071863580998782, + "grad_norm": 0.5983869433403015, + "learning_rate": 0.00015405720680517618, + "loss": 0.6869, + "mean_token_accuracy": 0.8110290810465812, + "num_tokens": 23682446.0, + "step": 1760 + }, + { + "epoch": 1.0779537149817295, + "grad_norm": 0.5919123291969299, + "learning_rate": 0.00015354024890605985, + "loss": 0.7419, + "mean_token_accuracy": 0.7984233900904656, + "num_tokens": 23806352.0, + "step": 1770 + }, + { + "epoch": 1.0840438489646773, + "grad_norm": 0.4900698661804199, + "learning_rate": 0.0001530214086724644, + "loss": 0.6781, + "mean_token_accuracy": 0.8152358055114746, + "num_tokens": 23942964.0, + "step": 1780 + }, + { + "epoch": 1.0901339829476249, + "grad_norm": 0.5409672856330872, + "learning_rate": 0.00015250070853491986, + "loss": 0.7157, + "mean_token_accuracy": 0.803682966530323, + "num_tokens": 24070937.0, + "step": 1790 + }, + { + "epoch": 1.0962241169305724, + "grad_norm": 0.5581572651863098, + "learning_rate": 0.0001519781710043638, + "loss": 0.7261, + "mean_token_accuracy": 0.8027503877878189, + "num_tokens": 24200686.0, + "step": 1800 + }, + { + "epoch": 1.1023142509135202, + "grad_norm": 0.503963053226471, + "learning_rate": 0.0001514538186711679, + "loss": 0.7125, + "mean_token_accuracy": 0.8042754918336869, + "num_tokens": 24329983.0, + "step": 1810 + }, + { + "epoch": 1.1084043848964678, + "grad_norm": 0.6159723997116089, + "learning_rate": 0.00015092767420416168, + "loss": 0.6873, + "mean_token_accuracy": 0.8115814313292503, + "num_tokens": 24465292.0, + "step": 1820 + }, + { + "epoch": 1.1144945188794153, + "grad_norm": 0.518172562122345, + "learning_rate": 0.00015039976034965214, + "loss": 0.6805, + "mean_token_accuracy": 0.8113815248012543, + "num_tokens": 24599980.0, + "step": 1830 + }, + { + "epoch": 1.1205846528623629, + "grad_norm": 0.5381601452827454, + "learning_rate": 0.0001498700999304407, + "loss": 0.6542, + "mean_token_accuracy": 0.8188014090061188, + "num_tokens": 24746703.0, + "step": 1840 + }, + { + "epoch": 1.1266747868453106, + "grad_norm": 0.5001223683357239, + "learning_rate": 0.00014933871584483615, + "loss": 0.7255, + "mean_token_accuracy": 0.8022593036293983, + "num_tokens": 24877604.0, + "step": 1850 + }, + { + "epoch": 1.1327649208282582, + "grad_norm": 0.5812251567840576, + "learning_rate": 0.00014880563106566512, + "loss": 0.6638, + "mean_token_accuracy": 0.8161928996443748, + "num_tokens": 25023049.0, + "step": 1860 + }, + { + "epoch": 1.1388550548112057, + "grad_norm": 0.5384249091148376, + "learning_rate": 0.0001482708686392786, + "loss": 0.6623, + "mean_token_accuracy": 0.8167443484067917, + "num_tokens": 25162124.0, + "step": 1870 + }, + { + "epoch": 1.1449451887941535, + "grad_norm": 0.5310192108154297, + "learning_rate": 0.00014773445168455576, + "loss": 0.7074, + "mean_token_accuracy": 0.8042578861117363, + "num_tokens": 25293569.0, + "step": 1880 + }, + { + "epoch": 1.151035322777101, + "grad_norm": 0.6224446296691895, + "learning_rate": 0.00014719640339190443, + "loss": 0.7094, + "mean_token_accuracy": 0.803679920732975, + "num_tokens": 25422953.0, + "step": 1890 + }, + { + "epoch": 1.1571254567600486, + "grad_norm": 0.5978189706802368, + "learning_rate": 0.00014665674702225853, + "loss": 0.6926, + "mean_token_accuracy": 0.8080565810203553, + "num_tokens": 25559091.0, + "step": 1900 + }, + { + "epoch": 1.1632155907429964, + "grad_norm": 0.6134657263755798, + "learning_rate": 0.00014611550590607245, + "loss": 0.6716, + "mean_token_accuracy": 0.8152063637971878, + "num_tokens": 25698134.0, + "step": 1910 + }, + { + "epoch": 1.169305724725944, + "grad_norm": 0.5075950026512146, + "learning_rate": 0.00014557270344231246, + "loss": 0.6613, + "mean_token_accuracy": 0.8169043198227882, + "num_tokens": 25835159.0, + "step": 1920 + }, + { + "epoch": 1.1753958587088915, + "grad_norm": 0.5035059452056885, + "learning_rate": 0.00014502836309744508, + "loss": 0.6903, + "mean_token_accuracy": 0.8096718549728393, + "num_tokens": 25970600.0, + "step": 1930 + }, + { + "epoch": 1.1814859926918393, + "grad_norm": 0.583890438079834, + "learning_rate": 0.00014448250840442254, + "loss": 0.6662, + "mean_token_accuracy": 0.8157578155398368, + "num_tokens": 26106658.0, + "step": 1940 + }, + { + "epoch": 1.1875761266747868, + "grad_norm": 0.5089572668075562, + "learning_rate": 0.00014393516296166552, + "loss": 0.7085, + "mean_token_accuracy": 0.8082539036870002, + "num_tokens": 26238847.0, + "step": 1950 + }, + { + "epoch": 1.1936662606577344, + "grad_norm": 0.4495029151439667, + "learning_rate": 0.00014338635043204288, + "loss": 0.7085, + "mean_token_accuracy": 0.8075269401073456, + "num_tokens": 26366417.0, + "step": 1960 + }, + { + "epoch": 1.1997563946406822, + "grad_norm": 0.6390108466148376, + "learning_rate": 0.00014283609454184855, + "loss": 0.6935, + "mean_token_accuracy": 0.8099950149655342, + "num_tokens": 26498101.0, + "step": 1970 + }, + { + "epoch": 1.2058465286236297, + "grad_norm": 0.5687986016273499, + "learning_rate": 0.00014228441907977607, + "loss": 0.7027, + "mean_token_accuracy": 0.8083449766039849, + "num_tokens": 26628513.0, + "step": 1980 + }, + { + "epoch": 1.2119366626065773, + "grad_norm": 0.487954318523407, + "learning_rate": 0.00014173134789588994, + "loss": 0.6731, + "mean_token_accuracy": 0.8129799589514732, + "num_tokens": 26761671.0, + "step": 1990 + }, + { + "epoch": 1.218026796589525, + "grad_norm": 0.5641826391220093, + "learning_rate": 0.00014117690490059447, + "loss": 0.6949, + "mean_token_accuracy": 0.8118783175945282, + "num_tokens": 26894870.0, + "step": 2000 + }, + { + "epoch": 1.2241169305724726, + "grad_norm": 0.5209829211235046, + "learning_rate": 0.00014062111406360034, + "loss": 0.6742, + "mean_token_accuracy": 0.816123254597187, + "num_tokens": 27027902.0, + "step": 2010 + }, + { + "epoch": 1.2302070645554202, + "grad_norm": 0.5218231678009033, + "learning_rate": 0.00014006399941288812, + "loss": 0.703, + "mean_token_accuracy": 0.805295330286026, + "num_tokens": 27157882.0, + "step": 2020 + }, + { + "epoch": 1.236297198538368, + "grad_norm": 0.48154470324516296, + "learning_rate": 0.00013950558503366957, + "loss": 0.6844, + "mean_token_accuracy": 0.811684039235115, + "num_tokens": 27290994.0, + "step": 2030 + }, + { + "epoch": 1.2423873325213155, + "grad_norm": 0.5417695045471191, + "learning_rate": 0.00013894589506734643, + "loss": 0.7253, + "mean_token_accuracy": 0.8018206775188446, + "num_tokens": 27420715.0, + "step": 2040 + }, + { + "epoch": 1.248477466504263, + "grad_norm": 0.5282937288284302, + "learning_rate": 0.00013838495371046671, + "loss": 0.682, + "mean_token_accuracy": 0.8128980100154877, + "num_tokens": 27552040.0, + "step": 2050 + }, + { + "epoch": 1.2545676004872108, + "grad_norm": 0.5213696360588074, + "learning_rate": 0.0001378227852136785, + "loss": 0.6728, + "mean_token_accuracy": 0.8128269612789154, + "num_tokens": 27686922.0, + "step": 2060 + }, + { + "epoch": 1.2606577344701584, + "grad_norm": 0.4823834300041199, + "learning_rate": 0.00013725941388068174, + "loss": 0.6626, + "mean_token_accuracy": 0.8177949145436287, + "num_tokens": 27825036.0, + "step": 2070 + }, + { + "epoch": 1.266747868453106, + "grad_norm": 0.6199477314949036, + "learning_rate": 0.0001366948640671775, + "loss": 0.686, + "mean_token_accuracy": 0.8107037082314491, + "num_tokens": 27961614.0, + "step": 2080 + }, + { + "epoch": 1.2728380024360537, + "grad_norm": 0.4916837513446808, + "learning_rate": 0.00013612916017981488, + "loss": 0.6738, + "mean_token_accuracy": 0.8149923622608185, + "num_tokens": 28099524.0, + "step": 2090 + }, + { + "epoch": 1.2789281364190013, + "grad_norm": 0.6001724600791931, + "learning_rate": 0.00013556232667513607, + "loss": 0.6637, + "mean_token_accuracy": 0.8173324480652809, + "num_tokens": 28237055.0, + "step": 2100 + }, + { + "epoch": 1.2850182704019488, + "grad_norm": 0.5887413620948792, + "learning_rate": 0.00013499438805851882, + "loss": 0.6744, + "mean_token_accuracy": 0.8149967223405838, + "num_tokens": 28370538.0, + "step": 2110 + }, + { + "epoch": 1.2911084043848966, + "grad_norm": 0.6208155751228333, + "learning_rate": 0.00013442536888311733, + "loss": 0.6973, + "mean_token_accuracy": 0.8103871151804924, + "num_tokens": 28499232.0, + "step": 2120 + }, + { + "epoch": 1.2971985383678442, + "grad_norm": 0.5026904344558716, + "learning_rate": 0.0001338552937488003, + "loss": 0.6739, + "mean_token_accuracy": 0.8153023451566697, + "num_tokens": 28633993.0, + "step": 2130 + }, + { + "epoch": 1.3032886723507917, + "grad_norm": 0.5218458771705627, + "learning_rate": 0.00013328418730108795, + "loss": 0.6619, + "mean_token_accuracy": 0.8166303977370262, + "num_tokens": 28774139.0, + "step": 2140 + }, + { + "epoch": 1.3093788063337393, + "grad_norm": 0.519872784614563, + "learning_rate": 0.00013271207423008622, + "loss": 0.6804, + "mean_token_accuracy": 0.8150519266724586, + "num_tokens": 28910109.0, + "step": 2150 + }, + { + "epoch": 1.315468940316687, + "grad_norm": 0.5219667553901672, + "learning_rate": 0.00013213897926941942, + "loss": 0.6682, + "mean_token_accuracy": 0.8166522830724716, + "num_tokens": 29045967.0, + "step": 2160 + }, + { + "epoch": 1.3215590742996346, + "grad_norm": 0.5744656920433044, + "learning_rate": 0.000131564927195161, + "loss": 0.6772, + "mean_token_accuracy": 0.8149690836668014, + "num_tokens": 29180769.0, + "step": 2170 + }, + { + "epoch": 1.3276492082825821, + "grad_norm": 0.5673508048057556, + "learning_rate": 0.00013098994282476236, + "loss": 0.6841, + "mean_token_accuracy": 0.812624742090702, + "num_tokens": 29313512.0, + "step": 2180 + }, + { + "epoch": 1.3337393422655297, + "grad_norm": 0.5187074542045593, + "learning_rate": 0.00013041405101598, + "loss": 0.6281, + "mean_token_accuracy": 0.8221091449260711, + "num_tokens": 29454589.0, + "step": 2190 + }, + { + "epoch": 1.3398294762484775, + "grad_norm": 0.5621201992034912, + "learning_rate": 0.00012983727666580086, + "loss": 0.6755, + "mean_token_accuracy": 0.8157430678606034, + "num_tokens": 29589968.0, + "step": 2200 + }, + { + "epoch": 1.345919610231425, + "grad_norm": 0.579699695110321, + "learning_rate": 0.00012925964470936598, + "loss": 0.6859, + "mean_token_accuracy": 0.8122102931141854, + "num_tokens": 29720188.0, + "step": 2210 + }, + { + "epoch": 1.3520097442143726, + "grad_norm": 0.6406823992729187, + "learning_rate": 0.00012868118011889236, + "loss": 0.684, + "mean_token_accuracy": 0.8107294023036957, + "num_tokens": 29848418.0, + "step": 2220 + }, + { + "epoch": 1.3580998781973204, + "grad_norm": 0.4707708954811096, + "learning_rate": 0.00012810190790259367, + "loss": 0.6607, + "mean_token_accuracy": 0.8182852879166603, + "num_tokens": 29988202.0, + "step": 2230 + }, + { + "epoch": 1.364190012180268, + "grad_norm": 0.6458183526992798, + "learning_rate": 0.00012752185310359874, + "loss": 0.6935, + "mean_token_accuracy": 0.8089477211236954, + "num_tokens": 30119777.0, + "step": 2240 + }, + { + "epoch": 1.3702801461632155, + "grad_norm": 0.4278848469257355, + "learning_rate": 0.00012694104079886918, + "loss": 0.6565, + "mean_token_accuracy": 0.8185079246759415, + "num_tokens": 30256776.0, + "step": 2250 + }, + { + "epoch": 1.3763702801461632, + "grad_norm": 0.5647698044776917, + "learning_rate": 0.00012635949609811505, + "loss": 0.6636, + "mean_token_accuracy": 0.8155051723122597, + "num_tokens": 30395629.0, + "step": 2260 + }, + { + "epoch": 1.3824604141291108, + "grad_norm": 0.43498411774635315, + "learning_rate": 0.00012577724414270937, + "loss": 0.689, + "mean_token_accuracy": 0.8125654354691505, + "num_tokens": 30532805.0, + "step": 2270 + }, + { + "epoch": 1.3885505481120584, + "grad_norm": 0.5296844244003296, + "learning_rate": 0.00012519431010460136, + "loss": 0.6854, + "mean_token_accuracy": 0.8122918352484703, + "num_tokens": 30664642.0, + "step": 2280 + }, + { + "epoch": 1.3946406820950061, + "grad_norm": 0.44080430269241333, + "learning_rate": 0.000124610719185228, + "loss": 0.6405, + "mean_token_accuracy": 0.8192834481596947, + "num_tokens": 30805370.0, + "step": 2290 + }, + { + "epoch": 1.4007308160779537, + "grad_norm": 0.5946847796440125, + "learning_rate": 0.00012402649661442453, + "loss": 0.7025, + "mean_token_accuracy": 0.8085126876831055, + "num_tokens": 30936385.0, + "step": 2300 + }, + { + "epoch": 1.4068209500609012, + "grad_norm": 0.6572047472000122, + "learning_rate": 0.0001234416676493339, + "loss": 0.709, + "mean_token_accuracy": 0.8046677514910698, + "num_tokens": 31067615.0, + "step": 2310 + }, + { + "epoch": 1.412911084043849, + "grad_norm": 0.4797047972679138, + "learning_rate": 0.0001228562575733147, + "loss": 0.6675, + "mean_token_accuracy": 0.8157136350870132, + "num_tokens": 31200044.0, + "step": 2320 + }, + { + "epoch": 1.4190012180267966, + "grad_norm": 0.5451430082321167, + "learning_rate": 0.0001222702916948481, + "loss": 0.6746, + "mean_token_accuracy": 0.8092615008354187, + "num_tokens": 31334451.0, + "step": 2330 + }, + { + "epoch": 1.4250913520097441, + "grad_norm": 0.5049906969070435, + "learning_rate": 0.00012168379534644371, + "loss": 0.6515, + "mean_token_accuracy": 0.8203717589378356, + "num_tokens": 31472218.0, + "step": 2340 + }, + { + "epoch": 1.431181485992692, + "grad_norm": 0.6531693935394287, + "learning_rate": 0.00012109679388354462, + "loss": 0.6778, + "mean_token_accuracy": 0.8134923160076142, + "num_tokens": 31605853.0, + "step": 2350 + }, + { + "epoch": 1.4372716199756395, + "grad_norm": 0.5340039730072021, + "learning_rate": 0.00012050931268343089, + "loss": 0.6628, + "mean_token_accuracy": 0.8176047816872597, + "num_tokens": 31741034.0, + "step": 2360 + }, + { + "epoch": 1.443361753958587, + "grad_norm": 0.4518280625343323, + "learning_rate": 0.00011992137714412266, + "loss": 0.6407, + "mean_token_accuracy": 0.8207336485385894, + "num_tokens": 31878661.0, + "step": 2370 + }, + { + "epoch": 1.4494518879415348, + "grad_norm": 0.5232827067375183, + "learning_rate": 0.00011933301268328212, + "loss": 0.6742, + "mean_token_accuracy": 0.8158077761530876, + "num_tokens": 32016524.0, + "step": 2380 + }, + { + "epoch": 1.4555420219244823, + "grad_norm": 0.5181542634963989, + "learning_rate": 0.00011874424473711457, + "loss": 0.699, + "mean_token_accuracy": 0.8078866004943848, + "num_tokens": 32146820.0, + "step": 2390 + }, + { + "epoch": 1.46163215590743, + "grad_norm": 0.5801041126251221, + "learning_rate": 0.00011815509875926883, + "loss": 0.6572, + "mean_token_accuracy": 0.8183338135480881, + "num_tokens": 32285928.0, + "step": 2400 + }, + { + "epoch": 1.4677222898903777, + "grad_norm": 0.5347133874893188, + "learning_rate": 0.00011756560021973679, + "loss": 0.6738, + "mean_token_accuracy": 0.8143690213561058, + "num_tokens": 32416470.0, + "step": 2410 + }, + { + "epoch": 1.4738124238733252, + "grad_norm": 0.4945615231990814, + "learning_rate": 0.0001169757746037524, + "loss": 0.6505, + "mean_token_accuracy": 0.8196728631854058, + "num_tokens": 32553798.0, + "step": 2420 + }, + { + "epoch": 1.4799025578562728, + "grad_norm": 0.5072743892669678, + "learning_rate": 0.00011638564741068965, + "loss": 0.625, + "mean_token_accuracy": 0.826240348815918, + "num_tokens": 32692511.0, + "step": 2430 + }, + { + "epoch": 1.4859926918392206, + "grad_norm": 0.5887538194656372, + "learning_rate": 0.00011579524415296043, + "loss": 0.6904, + "mean_token_accuracy": 0.8112018033862114, + "num_tokens": 32818836.0, + "step": 2440 + }, + { + "epoch": 1.4920828258221681, + "grad_norm": 0.5464449524879456, + "learning_rate": 0.00011520459035491142, + "loss": 0.6553, + "mean_token_accuracy": 0.8198345899581909, + "num_tokens": 32957967.0, + "step": 2450 + }, + { + "epoch": 1.4981729598051157, + "grad_norm": 0.5787419676780701, + "learning_rate": 0.00011461371155172071, + "loss": 0.663, + "mean_token_accuracy": 0.8155046373605728, + "num_tokens": 33094241.0, + "step": 2460 + }, + { + "epoch": 1.5042630937880634, + "grad_norm": 0.5159268975257874, + "learning_rate": 0.00011402263328829384, + "loss": 0.6792, + "mean_token_accuracy": 0.8127613604068756, + "num_tokens": 33225474.0, + "step": 2470 + }, + { + "epoch": 1.510353227771011, + "grad_norm": 0.5665333867073059, + "learning_rate": 0.00011343138111815939, + "loss": 0.6265, + "mean_token_accuracy": 0.8276977241039276, + "num_tokens": 33368246.0, + "step": 2480 + }, + { + "epoch": 1.5164433617539586, + "grad_norm": 0.6272276639938354, + "learning_rate": 0.00011283998060236421, + "loss": 0.6734, + "mean_token_accuracy": 0.816029068827629, + "num_tokens": 33503967.0, + "step": 2490 + }, + { + "epoch": 1.5225334957369063, + "grad_norm": 0.5275886654853821, + "learning_rate": 0.0001122484573083686, + "loss": 0.6457, + "mean_token_accuracy": 0.8222623988986015, + "num_tokens": 33641826.0, + "step": 2500 + }, + { + "epoch": 1.5286236297198539, + "grad_norm": 0.5526687502861023, + "learning_rate": 0.00011165683680894072, + "loss": 0.6795, + "mean_token_accuracy": 0.8127825185656548, + "num_tokens": 33774185.0, + "step": 2510 + }, + { + "epoch": 1.5347137637028014, + "grad_norm": 0.6226133704185486, + "learning_rate": 0.00011106514468105111, + "loss": 0.6684, + "mean_token_accuracy": 0.815614765882492, + "num_tokens": 33907116.0, + "step": 2520 + }, + { + "epoch": 1.5408038976857492, + "grad_norm": 0.612832248210907, + "learning_rate": 0.000110473406504767, + "loss": 0.6287, + "mean_token_accuracy": 0.8220825806260109, + "num_tokens": 34048267.0, + "step": 2530 + }, + { + "epoch": 1.5468940316686965, + "grad_norm": 0.6066681742668152, + "learning_rate": 0.00010988164786214639, + "loss": 0.6851, + "mean_token_accuracy": 0.8115911707282066, + "num_tokens": 34177555.0, + "step": 2540 + }, + { + "epoch": 1.5529841656516443, + "grad_norm": 0.6376360058784485, + "learning_rate": 0.00010928989433613204, + "loss": 0.6921, + "mean_token_accuracy": 0.8096534594893455, + "num_tokens": 34308932.0, + "step": 2550 + }, + { + "epoch": 1.559074299634592, + "grad_norm": 0.6083400249481201, + "learning_rate": 0.00010869817150944546, + "loss": 0.6575, + "mean_token_accuracy": 0.8187816679477692, + "num_tokens": 34443994.0, + "step": 2560 + }, + { + "epoch": 1.5651644336175394, + "grad_norm": 0.6098156571388245, + "learning_rate": 0.00010810650496348116, + "loss": 0.6092, + "mean_token_accuracy": 0.8285523638129234, + "num_tokens": 34588403.0, + "step": 2570 + }, + { + "epoch": 1.5712545676004872, + "grad_norm": 0.47795701026916504, + "learning_rate": 0.00010751492027720027, + "loss": 0.6423, + "mean_token_accuracy": 0.8211737647652626, + "num_tokens": 34730426.0, + "step": 2580 + }, + { + "epoch": 1.577344701583435, + "grad_norm": 0.560787558555603, + "learning_rate": 0.00010692344302602515, + "loss": 0.6707, + "mean_token_accuracy": 0.8134441033005715, + "num_tokens": 34861708.0, + "step": 2590 + }, + { + "epoch": 1.5834348355663823, + "grad_norm": 0.5722246766090393, + "learning_rate": 0.00010633209878073343, + "loss": 0.6533, + "mean_token_accuracy": 0.8185199156403542, + "num_tokens": 34997377.0, + "step": 2600 + }, + { + "epoch": 1.58952496954933, + "grad_norm": 0.4941788613796234, + "learning_rate": 0.00010574091310635263, + "loss": 0.6487, + "mean_token_accuracy": 0.8205527886748314, + "num_tokens": 35133685.0, + "step": 2610 + }, + { + "epoch": 1.5956151035322779, + "grad_norm": 0.575986921787262, + "learning_rate": 0.00010514991156105493, + "loss": 0.6615, + "mean_token_accuracy": 0.8179458349943161, + "num_tokens": 35270993.0, + "step": 2620 + }, + { + "epoch": 1.6017052375152252, + "grad_norm": 0.5677866339683533, + "learning_rate": 0.00010455911969505228, + "loss": 0.6572, + "mean_token_accuracy": 0.8155815675854683, + "num_tokens": 35402062.0, + "step": 2630 + }, + { + "epoch": 1.607795371498173, + "grad_norm": 0.6232825517654419, + "learning_rate": 0.00010396856304949162, + "loss": 0.6477, + "mean_token_accuracy": 0.8209305629134178, + "num_tokens": 35537394.0, + "step": 2640 + }, + { + "epoch": 1.6138855054811205, + "grad_norm": 0.6252410411834717, + "learning_rate": 0.00010337826715535102, + "loss": 0.6819, + "mean_token_accuracy": 0.8137489795684815, + "num_tokens": 35669332.0, + "step": 2650 + }, + { + "epoch": 1.619975639464068, + "grad_norm": 0.5850580334663391, + "learning_rate": 0.0001027882575323356, + "loss": 0.6831, + "mean_token_accuracy": 0.8099577218294144, + "num_tokens": 35799095.0, + "step": 2660 + }, + { + "epoch": 1.6260657734470159, + "grad_norm": 0.5118699073791504, + "learning_rate": 0.00010219855968777442, + "loss": 0.681, + "mean_token_accuracy": 0.8123177006840706, + "num_tokens": 35928313.0, + "step": 2670 + }, + { + "epoch": 1.6321559074299634, + "grad_norm": 0.5392698645591736, + "learning_rate": 0.00010160919911551774, + "loss": 0.6536, + "mean_token_accuracy": 0.8185337752103805, + "num_tokens": 36062033.0, + "step": 2680 + }, + { + "epoch": 1.638246041412911, + "grad_norm": 0.5542203783988953, + "learning_rate": 0.00010102020129483481, + "loss": 0.6859, + "mean_token_accuracy": 0.8107540607452393, + "num_tokens": 36190194.0, + "step": 2690 + }, + { + "epoch": 1.6443361753958587, + "grad_norm": 0.5962918996810913, + "learning_rate": 0.0001004315916893124, + "loss": 0.64, + "mean_token_accuracy": 0.8226593688130379, + "num_tokens": 36322437.0, + "step": 2700 + }, + { + "epoch": 1.6504263093788063, + "grad_norm": 0.6391364932060242, + "learning_rate": 9.984339574575394e-05, + "loss": 0.6457, + "mean_token_accuracy": 0.8250340327620507, + "num_tokens": 36463231.0, + "step": 2710 + }, + { + "epoch": 1.6565164433617539, + "grad_norm": 0.5798075795173645, + "learning_rate": 9.92556388930794e-05, + "loss": 0.6901, + "mean_token_accuracy": 0.8104871213436127, + "num_tokens": 36588963.0, + "step": 2720 + }, + { + "epoch": 1.6626065773447016, + "grad_norm": 0.5375143885612488, + "learning_rate": 9.866834654122597e-05, + "loss": 0.6723, + "mean_token_accuracy": 0.8132491707801819, + "num_tokens": 36724295.0, + "step": 2730 + }, + { + "epoch": 1.6686967113276492, + "grad_norm": 0.5556331276893616, + "learning_rate": 9.808154408004942e-05, + "loss": 0.6316, + "mean_token_accuracy": 0.8221101492643357, + "num_tokens": 36855978.0, + "step": 2740 + }, + { + "epoch": 1.6747868453105967, + "grad_norm": 0.5330142974853516, + "learning_rate": 9.749525687822674e-05, + "loss": 0.6269, + "mean_token_accuracy": 0.8239532545208931, + "num_tokens": 36994164.0, + "step": 2750 + }, + { + "epoch": 1.6808769792935445, + "grad_norm": 0.568084716796875, + "learning_rate": 9.6909510282159e-05, + "loss": 0.6568, + "mean_token_accuracy": 0.8158794924616813, + "num_tokens": 37130680.0, + "step": 2760 + }, + { + "epoch": 1.686967113276492, + "grad_norm": 0.5072943568229675, + "learning_rate": 9.632432961487585e-05, + "loss": 0.6838, + "mean_token_accuracy": 0.8121756613254547, + "num_tokens": 37261462.0, + "step": 2770 + }, + { + "epoch": 1.6930572472594396, + "grad_norm": 0.5469337701797485, + "learning_rate": 9.573974017494069e-05, + "loss": 0.6447, + "mean_token_accuracy": 0.8220986798405647, + "num_tokens": 37395606.0, + "step": 2780 + }, + { + "epoch": 1.6991473812423874, + "grad_norm": 0.57918381690979, + "learning_rate": 9.515576723535689e-05, + "loss": 0.6217, + "mean_token_accuracy": 0.822702020406723, + "num_tokens": 37533585.0, + "step": 2790 + }, + { + "epoch": 1.705237515225335, + "grad_norm": 0.6425563097000122, + "learning_rate": 9.45724360424753e-05, + "loss": 0.6435, + "mean_token_accuracy": 0.8198476612567902, + "num_tokens": 37672877.0, + "step": 2800 + }, + { + "epoch": 1.7113276492082825, + "grad_norm": 0.5059729218482971, + "learning_rate": 9.398977181490274e-05, + "loss": 0.6579, + "mean_token_accuracy": 0.8166012555360794, + "num_tokens": 37809109.0, + "step": 2810 + }, + { + "epoch": 1.7174177831912303, + "grad_norm": 0.5450888276100159, + "learning_rate": 9.340779974241167e-05, + "loss": 0.6175, + "mean_token_accuracy": 0.8274259582161904, + "num_tokens": 37950597.0, + "step": 2820 + }, + { + "epoch": 1.7235079171741778, + "grad_norm": 0.6464765667915344, + "learning_rate": 9.282654498485139e-05, + "loss": 0.6636, + "mean_token_accuracy": 0.8163545817136765, + "num_tokens": 38086904.0, + "step": 2830 + }, + { + "epoch": 1.7295980511571254, + "grad_norm": 0.6118177175521851, + "learning_rate": 9.22460326710601e-05, + "loss": 0.6696, + "mean_token_accuracy": 0.8133967757225037, + "num_tokens": 38219759.0, + "step": 2840 + }, + { + "epoch": 1.7356881851400732, + "grad_norm": 0.5518969893455505, + "learning_rate": 9.16662878977786e-05, + "loss": 0.6659, + "mean_token_accuracy": 0.8180875137448311, + "num_tokens": 38349770.0, + "step": 2850 + }, + { + "epoch": 1.7417783191230207, + "grad_norm": 0.6465517282485962, + "learning_rate": 9.108733572856549e-05, + "loss": 0.6581, + "mean_token_accuracy": 0.8170399129390716, + "num_tokens": 38482303.0, + "step": 2860 + }, + { + "epoch": 1.7478684531059683, + "grad_norm": 0.5193557143211365, + "learning_rate": 9.050920119271335e-05, + "loss": 0.6543, + "mean_token_accuracy": 0.8178304255008697, + "num_tokens": 38615426.0, + "step": 2870 + }, + { + "epoch": 1.753958587088916, + "grad_norm": 0.611529529094696, + "learning_rate": 8.993190928416682e-05, + "loss": 0.6248, + "mean_token_accuracy": 0.8259203046560287, + "num_tokens": 38755859.0, + "step": 2880 + }, + { + "epoch": 1.7600487210718636, + "grad_norm": 0.5405944585800171, + "learning_rate": 8.935548496044198e-05, + "loss": 0.6232, + "mean_token_accuracy": 0.8281204700469971, + "num_tokens": 38893007.0, + "step": 2890 + }, + { + "epoch": 1.7661388550548112, + "grad_norm": 0.6433010697364807, + "learning_rate": 8.877995314154748e-05, + "loss": 0.6751, + "mean_token_accuracy": 0.8155393078923225, + "num_tokens": 39020285.0, + "step": 2900 + }, + { + "epoch": 1.772228989037759, + "grad_norm": 0.47974956035614014, + "learning_rate": 8.820533870890717e-05, + "loss": 0.6527, + "mean_token_accuracy": 0.8197720810770989, + "num_tokens": 39151426.0, + "step": 2910 + }, + { + "epoch": 1.7783191230207065, + "grad_norm": 0.5529680848121643, + "learning_rate": 8.763166650428436e-05, + "loss": 0.6262, + "mean_token_accuracy": 0.8256829127669334, + "num_tokens": 39294242.0, + "step": 2920 + }, + { + "epoch": 1.784409257003654, + "grad_norm": 0.6060122847557068, + "learning_rate": 8.705896132870797e-05, + "loss": 0.6563, + "mean_token_accuracy": 0.8192467406392098, + "num_tokens": 39425879.0, + "step": 2930 + }, + { + "epoch": 1.7904993909866018, + "grad_norm": 0.6099355220794678, + "learning_rate": 8.648724794140017e-05, + "loss": 0.6664, + "mean_token_accuracy": 0.8186777010560036, + "num_tokens": 39559787.0, + "step": 2940 + }, + { + "epoch": 1.7965895249695494, + "grad_norm": 0.5908733010292053, + "learning_rate": 8.591655105870615e-05, + "loss": 0.6712, + "mean_token_accuracy": 0.8136340633034707, + "num_tokens": 39689823.0, + "step": 2950 + }, + { + "epoch": 1.802679658952497, + "grad_norm": 0.5845519304275513, + "learning_rate": 8.534689535302553e-05, + "loss": 0.6608, + "mean_token_accuracy": 0.8170475289225578, + "num_tokens": 39820725.0, + "step": 2960 + }, + { + "epoch": 1.8087697929354447, + "grad_norm": 0.6311175227165222, + "learning_rate": 8.47783054517457e-05, + "loss": 0.6491, + "mean_token_accuracy": 0.8193596869707107, + "num_tokens": 39947874.0, + "step": 2970 + }, + { + "epoch": 1.814859926918392, + "grad_norm": 0.5293188691139221, + "learning_rate": 8.421080593617706e-05, + "loss": 0.6105, + "mean_token_accuracy": 0.83141258507967, + "num_tokens": 40091297.0, + "step": 2980 + }, + { + "epoch": 1.8209500609013398, + "grad_norm": 0.5252617597579956, + "learning_rate": 8.364442134049049e-05, + "loss": 0.6356, + "mean_token_accuracy": 0.8237936720252037, + "num_tokens": 40229207.0, + "step": 2990 + }, + { + "epoch": 1.8270401948842876, + "grad_norm": 0.6039798855781555, + "learning_rate": 8.30791761506565e-05, + "loss": 0.6456, + "mean_token_accuracy": 0.8206364914774895, + "num_tokens": 40364863.0, + "step": 3000 + }, + { + "epoch": 1.833130328867235, + "grad_norm": 0.5508609414100647, + "learning_rate": 8.251509480338684e-05, + "loss": 0.6229, + "mean_token_accuracy": 0.8255992740392685, + "num_tokens": 40504123.0, + "step": 3010 + }, + { + "epoch": 1.8392204628501827, + "grad_norm": 0.5637634992599487, + "learning_rate": 8.195220168507789e-05, + "loss": 0.6026, + "mean_token_accuracy": 0.8290412962436676, + "num_tokens": 40646821.0, + "step": 3020 + }, + { + "epoch": 1.8453105968331305, + "grad_norm": 0.5463610291481018, + "learning_rate": 8.139052113075645e-05, + "loss": 0.6278, + "mean_token_accuracy": 0.8244929850101471, + "num_tokens": 40778989.0, + "step": 3030 + }, + { + "epoch": 1.8514007308160778, + "grad_norm": 0.5360645055770874, + "learning_rate": 8.083007742302776e-05, + "loss": 0.6336, + "mean_token_accuracy": 0.8228462666273118, + "num_tokens": 40917560.0, + "step": 3040 + }, + { + "epoch": 1.8574908647990256, + "grad_norm": 0.5185632705688477, + "learning_rate": 8.02708947910255e-05, + "loss": 0.5991, + "mean_token_accuracy": 0.830042028427124, + "num_tokens": 41059707.0, + "step": 3050 + }, + { + "epoch": 1.8635809987819734, + "grad_norm": 0.6445353627204895, + "learning_rate": 7.971299740936456e-05, + "loss": 0.6555, + "mean_token_accuracy": 0.8169184163212776, + "num_tokens": 41192515.0, + "step": 3060 + }, + { + "epoch": 1.8696711327649207, + "grad_norm": 0.5360421538352966, + "learning_rate": 7.915640939709576e-05, + "loss": 0.6234, + "mean_token_accuracy": 0.8257398083806038, + "num_tokens": 41330047.0, + "step": 3070 + }, + { + "epoch": 1.8757612667478685, + "grad_norm": 0.58651202917099, + "learning_rate": 7.860115481666333e-05, + "loss": 0.6564, + "mean_token_accuracy": 0.8205534905195236, + "num_tokens": 41460379.0, + "step": 3080 + }, + { + "epoch": 1.881851400730816, + "grad_norm": 0.6842640042304993, + "learning_rate": 7.804725767286427e-05, + "loss": 0.6935, + "mean_token_accuracy": 0.8097458809614182, + "num_tokens": 41581210.0, + "step": 3090 + }, + { + "epoch": 1.8879415347137636, + "grad_norm": 0.5175514817237854, + "learning_rate": 7.749474191181096e-05, + "loss": 0.6393, + "mean_token_accuracy": 0.8219558611512184, + "num_tokens": 41714792.0, + "step": 3100 + }, + { + "epoch": 1.8940316686967114, + "grad_norm": 0.5963588356971741, + "learning_rate": 7.694363141989575e-05, + "loss": 0.658, + "mean_token_accuracy": 0.8182344615459443, + "num_tokens": 41846600.0, + "step": 3110 + }, + { + "epoch": 1.900121802679659, + "grad_norm": 0.6149535775184631, + "learning_rate": 7.639395002275827e-05, + "loss": 0.6499, + "mean_token_accuracy": 0.8208124756813049, + "num_tokens": 41977627.0, + "step": 3120 + }, + { + "epoch": 1.9062119366626065, + "grad_norm": 0.5739808678627014, + "learning_rate": 7.584572148425544e-05, + "loss": 0.6703, + "mean_token_accuracy": 0.8125967502593994, + "num_tokens": 42104510.0, + "step": 3130 + }, + { + "epoch": 1.9123020706455542, + "grad_norm": 0.5982648730278015, + "learning_rate": 7.529896950543416e-05, + "loss": 0.6513, + "mean_token_accuracy": 0.8201186507940292, + "num_tokens": 42236168.0, + "step": 3140 + }, + { + "epoch": 1.9183922046285018, + "grad_norm": 0.5896486043930054, + "learning_rate": 7.475371772350658e-05, + "loss": 0.6133, + "mean_token_accuracy": 0.8260134413838387, + "num_tokens": 42375086.0, + "step": 3150 + }, + { + "epoch": 1.9244823386114494, + "grad_norm": 0.6223361492156982, + "learning_rate": 7.420998971082833e-05, + "loss": 0.6638, + "mean_token_accuracy": 0.8162963137030601, + "num_tokens": 42506457.0, + "step": 3160 + }, + { + "epoch": 1.9305724725943971, + "grad_norm": 0.709854245185852, + "learning_rate": 7.366780897387924e-05, + "loss": 0.6324, + "mean_token_accuracy": 0.8247174829244613, + "num_tokens": 42640886.0, + "step": 3170 + }, + { + "epoch": 1.9366626065773447, + "grad_norm": 0.6794169545173645, + "learning_rate": 7.312719895224736e-05, + "loss": 0.6164, + "mean_token_accuracy": 0.82676922082901, + "num_tokens": 42781318.0, + "step": 3180 + }, + { + "epoch": 1.9427527405602922, + "grad_norm": 0.49305981397628784, + "learning_rate": 7.258818301761532e-05, + "loss": 0.6216, + "mean_token_accuracy": 0.8258268669247627, + "num_tokens": 42919381.0, + "step": 3190 + }, + { + "epoch": 1.94884287454324, + "grad_norm": 0.5072576999664307, + "learning_rate": 7.205078447275031e-05, + "loss": 0.6407, + "mean_token_accuracy": 0.819316141307354, + "num_tokens": 43056494.0, + "step": 3200 + }, + { + "epoch": 1.9549330085261876, + "grad_norm": 0.6188381314277649, + "learning_rate": 7.151502655049623e-05, + "loss": 0.6022, + "mean_token_accuracy": 0.8328602254390717, + "num_tokens": 43197795.0, + "step": 3210 + }, + { + "epoch": 1.9610231425091351, + "grad_norm": 0.5626131296157837, + "learning_rate": 7.098093241276962e-05, + "loss": 0.6245, + "mean_token_accuracy": 0.8258091285824776, + "num_tokens": 43340325.0, + "step": 3220 + }, + { + "epoch": 1.967113276492083, + "grad_norm": 0.5364338755607605, + "learning_rate": 7.044852514955816e-05, + "loss": 0.6454, + "mean_token_accuracy": 0.8199462234973908, + "num_tokens": 43472226.0, + "step": 3230 + }, + { + "epoch": 1.9732034104750305, + "grad_norm": 0.5460382699966431, + "learning_rate": 6.991782777792244e-05, + "loss": 0.6214, + "mean_token_accuracy": 0.8251617640256882, + "num_tokens": 43609559.0, + "step": 3240 + }, + { + "epoch": 1.979293544457978, + "grad_norm": 0.6013203263282776, + "learning_rate": 6.938886324100097e-05, + "loss": 0.6422, + "mean_token_accuracy": 0.8197862133383751, + "num_tokens": 43743060.0, + "step": 3250 + }, + { + "epoch": 1.9853836784409258, + "grad_norm": 0.6688512563705444, + "learning_rate": 6.88616544070182e-05, + "loss": 0.6447, + "mean_token_accuracy": 0.820338460803032, + "num_tokens": 43876874.0, + "step": 3260 + }, + { + "epoch": 1.9914738124238733, + "grad_norm": 0.6146946549415588, + "learning_rate": 6.8336224068296e-05, + "loss": 0.6015, + "mean_token_accuracy": 0.8318811848759651, + "num_tokens": 44021963.0, + "step": 3270 + }, + { + "epoch": 1.997563946406821, + "grad_norm": 0.5972597599029541, + "learning_rate": 6.781259494026821e-05, + "loss": 0.6094, + "mean_token_accuracy": 0.8282003849744797, + "num_tokens": 44159207.0, + "step": 3280 + }, + { + "epoch": 2.0036540803897687, + "grad_norm": 0.5882354974746704, + "learning_rate": 6.729078966049863e-05, + "loss": 0.6413, + "mean_token_accuracy": 0.8182575166225433, + "num_tokens": 44286530.0, + "step": 3290 + }, + { + "epoch": 2.009744214372716, + "grad_norm": 0.6095362901687622, + "learning_rate": 6.67708307877023e-05, + "loss": 0.6004, + "mean_token_accuracy": 0.8311555400490761, + "num_tokens": 44421317.0, + "step": 3300 + }, + { + "epoch": 2.0158343483556638, + "grad_norm": 0.4933398962020874, + "learning_rate": 6.625274080077034e-05, + "loss": 0.5953, + "mean_token_accuracy": 0.8339588925242424, + "num_tokens": 44559187.0, + "step": 3310 + }, + { + "epoch": 2.0219244823386116, + "grad_norm": 0.5924236178398132, + "learning_rate": 6.573654209779808e-05, + "loss": 0.5653, + "mean_token_accuracy": 0.8377310782670975, + "num_tokens": 44698135.0, + "step": 3320 + }, + { + "epoch": 2.028014616321559, + "grad_norm": 0.7029783129692078, + "learning_rate": 6.522225699511671e-05, + "loss": 0.5738, + "mean_token_accuracy": 0.8355662778019906, + "num_tokens": 44832354.0, + "step": 3330 + }, + { + "epoch": 2.0341047503045067, + "grad_norm": 0.591604471206665, + "learning_rate": 6.470990772632868e-05, + "loss": 0.5682, + "mean_token_accuracy": 0.8361614629626274, + "num_tokens": 44978302.0, + "step": 3340 + }, + { + "epoch": 2.0401948842874544, + "grad_norm": 0.6186702847480774, + "learning_rate": 6.419951644134623e-05, + "loss": 0.6036, + "mean_token_accuracy": 0.8307035118341446, + "num_tokens": 45110425.0, + "step": 3350 + }, + { + "epoch": 2.0462850182704018, + "grad_norm": 0.5165773630142212, + "learning_rate": 6.369110520543397e-05, + "loss": 0.5994, + "mean_token_accuracy": 0.8298280730843544, + "num_tokens": 45249677.0, + "step": 3360 + }, + { + "epoch": 2.0523751522533495, + "grad_norm": 0.6079832315444946, + "learning_rate": 6.318469599825489e-05, + "loss": 0.5921, + "mean_token_accuracy": 0.8314823001623154, + "num_tokens": 45384176.0, + "step": 3370 + }, + { + "epoch": 2.0584652862362973, + "grad_norm": 0.6483248472213745, + "learning_rate": 6.268031071292028e-05, + "loss": 0.6059, + "mean_token_accuracy": 0.8267465397715569, + "num_tokens": 45517909.0, + "step": 3380 + }, + { + "epoch": 2.0645554202192447, + "grad_norm": 0.5445775389671326, + "learning_rate": 6.217797115504296e-05, + "loss": 0.5923, + "mean_token_accuracy": 0.8316209375858307, + "num_tokens": 45654194.0, + "step": 3390 + }, + { + "epoch": 2.0706455542021924, + "grad_norm": 0.6171122789382935, + "learning_rate": 6.16776990417949e-05, + "loss": 0.6385, + "mean_token_accuracy": 0.820763637125492, + "num_tokens": 45784344.0, + "step": 3400 + }, + { + "epoch": 2.07673568818514, + "grad_norm": 0.6944971680641174, + "learning_rate": 6.117951600096805e-05, + "loss": 0.6352, + "mean_token_accuracy": 0.8226177647709847, + "num_tokens": 45911670.0, + "step": 3410 + }, + { + "epoch": 2.0828258221680875, + "grad_norm": 0.6227422952651978, + "learning_rate": 6.06834435700396e-05, + "loss": 0.5697, + "mean_token_accuracy": 0.8398994401097297, + "num_tokens": 46055567.0, + "step": 3420 + }, + { + "epoch": 2.0889159561510353, + "grad_norm": 0.6456082463264465, + "learning_rate": 6.018950319524062e-05, + "loss": 0.606, + "mean_token_accuracy": 0.8293915688991547, + "num_tokens": 46190030.0, + "step": 3430 + }, + { + "epoch": 2.095006090133983, + "grad_norm": 0.6646130681037903, + "learning_rate": 5.969771623062905e-05, + "loss": 0.6088, + "mean_token_accuracy": 0.8277154579758644, + "num_tokens": 46324733.0, + "step": 3440 + }, + { + "epoch": 2.1010962241169304, + "grad_norm": 0.6068746447563171, + "learning_rate": 5.920810393716647e-05, + "loss": 0.6055, + "mean_token_accuracy": 0.8310004472732544, + "num_tokens": 46457965.0, + "step": 3450 + }, + { + "epoch": 2.107186358099878, + "grad_norm": 0.6344166398048401, + "learning_rate": 5.872068748179904e-05, + "loss": 0.6235, + "mean_token_accuracy": 0.826252605021, + "num_tokens": 46591001.0, + "step": 3460 + }, + { + "epoch": 2.113276492082826, + "grad_norm": 0.670253574848175, + "learning_rate": 5.823548793654222e-05, + "loss": 0.5922, + "mean_token_accuracy": 0.8330878585577011, + "num_tokens": 46733212.0, + "step": 3470 + }, + { + "epoch": 2.1193666260657733, + "grad_norm": 0.6969623565673828, + "learning_rate": 5.775252627756988e-05, + "loss": 0.6112, + "mean_token_accuracy": 0.8287390932440758, + "num_tokens": 46869195.0, + "step": 3480 + }, + { + "epoch": 2.125456760048721, + "grad_norm": 0.5771912932395935, + "learning_rate": 5.727182338430759e-05, + "loss": 0.6025, + "mean_token_accuracy": 0.8298723086714744, + "num_tokens": 47004726.0, + "step": 3490 + }, + { + "epoch": 2.131546894031669, + "grad_norm": 0.5449331998825073, + "learning_rate": 5.679340003852971e-05, + "loss": 0.6334, + "mean_token_accuracy": 0.8226374134421348, + "num_tokens": 47128817.0, + "step": 3500 + }, + { + "epoch": 2.137637028014616, + "grad_norm": 0.6869291067123413, + "learning_rate": 5.6317276923461074e-05, + "loss": 0.6234, + "mean_token_accuracy": 0.8266696631908417, + "num_tokens": 47262232.0, + "step": 3510 + }, + { + "epoch": 2.143727161997564, + "grad_norm": 0.6404641270637512, + "learning_rate": 5.584347462288294e-05, + "loss": 0.5965, + "mean_token_accuracy": 0.8314864963293076, + "num_tokens": 47395541.0, + "step": 3520 + }, + { + "epoch": 2.1498172959805117, + "grad_norm": 0.6093842387199402, + "learning_rate": 5.537201362024287e-05, + "loss": 0.5873, + "mean_token_accuracy": 0.8329750701785088, + "num_tokens": 47538479.0, + "step": 3530 + }, + { + "epoch": 2.155907429963459, + "grad_norm": 0.6266767978668213, + "learning_rate": 5.490291429776933e-05, + "loss": 0.6107, + "mean_token_accuracy": 0.8280060842633248, + "num_tokens": 47668527.0, + "step": 3540 + }, + { + "epoch": 2.161997563946407, + "grad_norm": 0.6084907054901123, + "learning_rate": 5.443619693559048e-05, + "loss": 0.6046, + "mean_token_accuracy": 0.8307000920176506, + "num_tokens": 47802055.0, + "step": 3550 + }, + { + "epoch": 2.1680876979293546, + "grad_norm": 0.6321319341659546, + "learning_rate": 5.397188171085747e-05, + "loss": 0.6376, + "mean_token_accuracy": 0.822290787100792, + "num_tokens": 47929896.0, + "step": 3560 + }, + { + "epoch": 2.174177831912302, + "grad_norm": 0.6345944404602051, + "learning_rate": 5.350998869687209e-05, + "loss": 0.642, + "mean_token_accuracy": 0.8202966138720512, + "num_tokens": 48057772.0, + "step": 3570 + }, + { + "epoch": 2.1802679658952497, + "grad_norm": 0.8021138310432434, + "learning_rate": 5.3050537862219005e-05, + "loss": 0.6004, + "mean_token_accuracy": 0.829450149834156, + "num_tokens": 48189172.0, + "step": 3580 + }, + { + "epoch": 2.1863580998781975, + "grad_norm": 0.734176754951477, + "learning_rate": 5.259354906990246e-05, + "loss": 0.6312, + "mean_token_accuracy": 0.823958395421505, + "num_tokens": 48317103.0, + "step": 3590 + }, + { + "epoch": 2.192448233861145, + "grad_norm": 0.5638821125030518, + "learning_rate": 5.213904207648749e-05, + "loss": 0.5778, + "mean_token_accuracy": 0.8363123446702957, + "num_tokens": 48454312.0, + "step": 3600 + }, + { + "epoch": 2.1985383678440926, + "grad_norm": 0.5940743088722229, + "learning_rate": 5.168703653124587e-05, + "loss": 0.6191, + "mean_token_accuracy": 0.8252517506480217, + "num_tokens": 48585829.0, + "step": 3610 + }, + { + "epoch": 2.2046285018270404, + "grad_norm": 0.6641266345977783, + "learning_rate": 5.1237551975306666e-05, + "loss": 0.6039, + "mean_token_accuracy": 0.8323954001069069, + "num_tokens": 48721617.0, + "step": 3620 + }, + { + "epoch": 2.2107186358099877, + "grad_norm": 0.5851954221725464, + "learning_rate": 5.0790607840811335e-05, + "loss": 0.642, + "mean_token_accuracy": 0.8193060621619225, + "num_tokens": 48848346.0, + "step": 3630 + }, + { + "epoch": 2.2168087697929355, + "grad_norm": 0.6154613494873047, + "learning_rate": 5.0346223450073795e-05, + "loss": 0.5972, + "mean_token_accuracy": 0.829984450340271, + "num_tokens": 48989423.0, + "step": 3640 + }, + { + "epoch": 2.2228989037758833, + "grad_norm": 0.6556016802787781, + "learning_rate": 4.990441801474487e-05, + "loss": 0.5768, + "mean_token_accuracy": 0.8358439221978188, + "num_tokens": 49127954.0, + "step": 3650 + }, + { + "epoch": 2.2289890377588306, + "grad_norm": 0.6562819480895996, + "learning_rate": 4.94652106349819e-05, + "loss": 0.582, + "mean_token_accuracy": 0.8355499967932701, + "num_tokens": 49268448.0, + "step": 3660 + }, + { + "epoch": 2.2350791717417784, + "grad_norm": 0.7151765823364258, + "learning_rate": 4.9028620298622924e-05, + "loss": 0.6543, + "mean_token_accuracy": 0.8192793279886246, + "num_tokens": 49393488.0, + "step": 3670 + }, + { + "epoch": 2.2411693057247257, + "grad_norm": 0.6189950108528137, + "learning_rate": 4.8594665880365796e-05, + "loss": 0.6065, + "mean_token_accuracy": 0.8288143903017045, + "num_tokens": 49527018.0, + "step": 3680 + }, + { + "epoch": 2.2472594397076735, + "grad_norm": 0.6800888180732727, + "learning_rate": 4.816336614095221e-05, + "loss": 0.5777, + "mean_token_accuracy": 0.8371251985430718, + "num_tokens": 49662686.0, + "step": 3690 + }, + { + "epoch": 2.2533495736906213, + "grad_norm": 0.6750311255455017, + "learning_rate": 4.7734739726356694e-05, + "loss": 0.5646, + "mean_token_accuracy": 0.8395438298583031, + "num_tokens": 49803437.0, + "step": 3700 + }, + { + "epoch": 2.259439707673569, + "grad_norm": 0.781650185585022, + "learning_rate": 4.730880516698042e-05, + "loss": 0.6074, + "mean_token_accuracy": 0.8285451725125312, + "num_tokens": 49938432.0, + "step": 3710 + }, + { + "epoch": 2.2655298416565164, + "grad_norm": 0.6055442690849304, + "learning_rate": 4.6885580876850095e-05, + "loss": 0.6106, + "mean_token_accuracy": 0.828575699031353, + "num_tokens": 50071578.0, + "step": 3720 + }, + { + "epoch": 2.271619975639464, + "grad_norm": 0.5515550374984741, + "learning_rate": 4.6465085152821924e-05, + "loss": 0.6172, + "mean_token_accuracy": 0.8277894973754882, + "num_tokens": 50205327.0, + "step": 3730 + }, + { + "epoch": 2.2777101096224115, + "grad_norm": 0.6260959506034851, + "learning_rate": 4.604733617379061e-05, + "loss": 0.6276, + "mean_token_accuracy": 0.8239622369408608, + "num_tokens": 50336366.0, + "step": 3740 + }, + { + "epoch": 2.2838002436053593, + "grad_norm": 0.6698872447013855, + "learning_rate": 4.5632351999903366e-05, + "loss": 0.6015, + "mean_token_accuracy": 0.8326066240668297, + "num_tokens": 50471282.0, + "step": 3750 + }, + { + "epoch": 2.289890377588307, + "grad_norm": 0.5506294965744019, + "learning_rate": 4.52201505717793e-05, + "loss": 0.6041, + "mean_token_accuracy": 0.8309131726622582, + "num_tokens": 50602518.0, + "step": 3760 + }, + { + "epoch": 2.2959805115712544, + "grad_norm": 0.5408323407173157, + "learning_rate": 4.4810749709733625e-05, + "loss": 0.5986, + "mean_token_accuracy": 0.8340771466493606, + "num_tokens": 50738973.0, + "step": 3770 + }, + { + "epoch": 2.302070645554202, + "grad_norm": 0.5263858437538147, + "learning_rate": 4.440416711300731e-05, + "loss": 0.6177, + "mean_token_accuracy": 0.8264430776238442, + "num_tokens": 50871381.0, + "step": 3780 + }, + { + "epoch": 2.30816077953715, + "grad_norm": 0.6784396767616272, + "learning_rate": 4.400042035900194e-05, + "loss": 0.5947, + "mean_token_accuracy": 0.8301294282078743, + "num_tokens": 51003495.0, + "step": 3790 + }, + { + "epoch": 2.3142509135200973, + "grad_norm": 0.585687518119812, + "learning_rate": 4.359952690251984e-05, + "loss": 0.5748, + "mean_token_accuracy": 0.8376362308859825, + "num_tokens": 51143094.0, + "step": 3800 + }, + { + "epoch": 2.320341047503045, + "grad_norm": 0.5948687791824341, + "learning_rate": 4.320150407500935e-05, + "loss": 0.6192, + "mean_token_accuracy": 0.8263852804899215, + "num_tokens": 51270732.0, + "step": 3810 + }, + { + "epoch": 2.326431181485993, + "grad_norm": 0.7059959173202515, + "learning_rate": 4.28063690838156e-05, + "loss": 0.5937, + "mean_token_accuracy": 0.8340961948037148, + "num_tokens": 51408741.0, + "step": 3820 + }, + { + "epoch": 2.33252131546894, + "grad_norm": 0.5434718728065491, + "learning_rate": 4.241413901143673e-05, + "loss": 0.5754, + "mean_token_accuracy": 0.838581845164299, + "num_tokens": 51549085.0, + "step": 3830 + }, + { + "epoch": 2.338611449451888, + "grad_norm": 0.6144487857818604, + "learning_rate": 4.202483081478516e-05, + "loss": 0.5929, + "mean_token_accuracy": 0.8351974800229073, + "num_tokens": 51685054.0, + "step": 3840 + }, + { + "epoch": 2.3447015834348357, + "grad_norm": 0.6229000091552734, + "learning_rate": 4.163846132445465e-05, + "loss": 0.6022, + "mean_token_accuracy": 0.8321109473705292, + "num_tokens": 51817361.0, + "step": 3850 + }, + { + "epoch": 2.350791717417783, + "grad_norm": 0.6818157434463501, + "learning_rate": 4.125504724399264e-05, + "loss": 0.6114, + "mean_token_accuracy": 0.8261456057429314, + "num_tokens": 51944695.0, + "step": 3860 + }, + { + "epoch": 2.356881851400731, + "grad_norm": 0.663104772567749, + "learning_rate": 4.087460514917811e-05, + "loss": 0.5733, + "mean_token_accuracy": 0.8354582965373993, + "num_tokens": 52084047.0, + "step": 3870 + }, + { + "epoch": 2.3629719853836786, + "grad_norm": 0.6448875069618225, + "learning_rate": 4.0497151487305077e-05, + "loss": 0.5788, + "mean_token_accuracy": 0.8370290577411652, + "num_tokens": 52220298.0, + "step": 3880 + }, + { + "epoch": 2.369062119366626, + "grad_norm": 0.5701313018798828, + "learning_rate": 4.012270257647129e-05, + "loss": 0.5919, + "mean_token_accuracy": 0.8328420773148537, + "num_tokens": 52354394.0, + "step": 3890 + }, + { + "epoch": 2.3751522533495737, + "grad_norm": 0.5643812417984009, + "learning_rate": 3.9751274604873135e-05, + "loss": 0.5828, + "mean_token_accuracy": 0.8361553356051445, + "num_tokens": 52491783.0, + "step": 3900 + }, + { + "epoch": 2.3812423873325215, + "grad_norm": 0.5355440974235535, + "learning_rate": 3.938288363010543e-05, + "loss": 0.59, + "mean_token_accuracy": 0.8323336541652679, + "num_tokens": 52623569.0, + "step": 3910 + }, + { + "epoch": 2.387332521315469, + "grad_norm": 0.5486684441566467, + "learning_rate": 3.9017545578467416e-05, + "loss": 0.5766, + "mean_token_accuracy": 0.8349390685558319, + "num_tokens": 52758323.0, + "step": 3920 + }, + { + "epoch": 2.3934226552984166, + "grad_norm": 0.7338609099388123, + "learning_rate": 3.865527624427424e-05, + "loss": 0.6389, + "mean_token_accuracy": 0.8210031896829605, + "num_tokens": 52885390.0, + "step": 3930 + }, + { + "epoch": 2.3995127892813644, + "grad_norm": 0.6802729368209839, + "learning_rate": 3.829609128917399e-05, + "loss": 0.5642, + "mean_token_accuracy": 0.8403128817677498, + "num_tokens": 53021341.0, + "step": 3940 + }, + { + "epoch": 2.4056029232643117, + "grad_norm": 0.6056346297264099, + "learning_rate": 3.794000624147081e-05, + "loss": 0.5718, + "mean_token_accuracy": 0.8383283510804176, + "num_tokens": 53161898.0, + "step": 3950 + }, + { + "epoch": 2.4116930572472595, + "grad_norm": 0.6759582161903381, + "learning_rate": 3.758703649545342e-05, + "loss": 0.6012, + "mean_token_accuracy": 0.8309965297579766, + "num_tokens": 53297422.0, + "step": 3960 + }, + { + "epoch": 2.4177831912302072, + "grad_norm": 0.6293630599975586, + "learning_rate": 3.723719731072964e-05, + "loss": 0.6016, + "mean_token_accuracy": 0.8290236741304398, + "num_tokens": 53426293.0, + "step": 3970 + }, + { + "epoch": 2.4238733252131546, + "grad_norm": 0.5844866037368774, + "learning_rate": 3.689050381156668e-05, + "loss": 0.5933, + "mean_token_accuracy": 0.8323912978172302, + "num_tokens": 53558802.0, + "step": 3980 + }, + { + "epoch": 2.4299634591961023, + "grad_norm": 0.6043350100517273, + "learning_rate": 3.654697098623731e-05, + "loss": 0.5912, + "mean_token_accuracy": 0.8325126841664314, + "num_tokens": 53692702.0, + "step": 3990 + }, + { + "epoch": 2.43605359317905, + "grad_norm": 0.6182368397712708, + "learning_rate": 3.6206613686371874e-05, + "loss": 0.6284, + "mean_token_accuracy": 0.8247880086302757, + "num_tokens": 53822212.0, + "step": 4000 + }, + { + "epoch": 2.4421437271619975, + "grad_norm": 0.6524989604949951, + "learning_rate": 3.586944662631628e-05, + "loss": 0.5655, + "mean_token_accuracy": 0.8384448051452636, + "num_tokens": 53966156.0, + "step": 4010 + }, + { + "epoch": 2.4482338611449452, + "grad_norm": 0.6937422156333923, + "learning_rate": 3.5535484382495686e-05, + "loss": 0.5625, + "mean_token_accuracy": 0.840263594686985, + "num_tokens": 54105125.0, + "step": 4020 + }, + { + "epoch": 2.4543239951278926, + "grad_norm": 0.7187004685401917, + "learning_rate": 3.520474139278455e-05, + "loss": 0.638, + "mean_token_accuracy": 0.8233362153172493, + "num_tokens": 54230056.0, + "step": 4030 + }, + { + "epoch": 2.4604141291108403, + "grad_norm": 0.7127388715744019, + "learning_rate": 3.487723195588231e-05, + "loss": 0.6007, + "mean_token_accuracy": 0.8296995043754578, + "num_tokens": 54364012.0, + "step": 4040 + }, + { + "epoch": 2.466504263093788, + "grad_norm": 0.596787691116333, + "learning_rate": 3.455297023069529e-05, + "loss": 0.5619, + "mean_token_accuracy": 0.84009919911623, + "num_tokens": 54505256.0, + "step": 4050 + }, + { + "epoch": 2.472594397076736, + "grad_norm": 0.5992204546928406, + "learning_rate": 3.423197023572453e-05, + "loss": 0.5662, + "mean_token_accuracy": 0.8365229025483132, + "num_tokens": 54641531.0, + "step": 4060 + }, + { + "epoch": 2.4786845310596832, + "grad_norm": 0.5798633098602295, + "learning_rate": 3.391424584845983e-05, + "loss": 0.5957, + "mean_token_accuracy": 0.8325728610157966, + "num_tokens": 54777760.0, + "step": 4070 + }, + { + "epoch": 2.484774665042631, + "grad_norm": 0.5911865830421448, + "learning_rate": 3.359981080477968e-05, + "loss": 0.5621, + "mean_token_accuracy": 0.8406099453568459, + "num_tokens": 54920046.0, + "step": 4080 + }, + { + "epoch": 2.4908647990255783, + "grad_norm": 0.7955853343009949, + "learning_rate": 3.32886786983575e-05, + "loss": 0.6041, + "mean_token_accuracy": 0.830414567887783, + "num_tokens": 55049133.0, + "step": 4090 + }, + { + "epoch": 2.496954933008526, + "grad_norm": 0.7664952874183655, + "learning_rate": 3.29808629800739e-05, + "loss": 0.5649, + "mean_token_accuracy": 0.8381562843918801, + "num_tokens": 55189390.0, + "step": 4100 + }, + { + "epoch": 2.503045066991474, + "grad_norm": 0.7716973423957825, + "learning_rate": 3.267637695743531e-05, + "loss": 0.609, + "mean_token_accuracy": 0.8287163496017456, + "num_tokens": 55320616.0, + "step": 4110 + }, + { + "epoch": 2.5091352009744217, + "grad_norm": 0.8114868402481079, + "learning_rate": 3.237523379399847e-05, + "loss": 0.5971, + "mean_token_accuracy": 0.8342167064547539, + "num_tokens": 55453396.0, + "step": 4120 + }, + { + "epoch": 2.515225334957369, + "grad_norm": 0.5958247780799866, + "learning_rate": 3.207744650880153e-05, + "loss": 0.5599, + "mean_token_accuracy": 0.841068896651268, + "num_tokens": 55594828.0, + "step": 4130 + }, + { + "epoch": 2.5213154689403168, + "grad_norm": 0.5653116106987, + "learning_rate": 3.178302797580104e-05, + "loss": 0.6087, + "mean_token_accuracy": 0.8287098646163941, + "num_tokens": 55727082.0, + "step": 4140 + }, + { + "epoch": 2.527405602923264, + "grad_norm": 0.5179179310798645, + "learning_rate": 3.149199092331553e-05, + "loss": 0.5676, + "mean_token_accuracy": 0.839223000407219, + "num_tokens": 55871490.0, + "step": 4150 + }, + { + "epoch": 2.533495736906212, + "grad_norm": 0.7765582799911499, + "learning_rate": 3.1204347933475144e-05, + "loss": 0.6045, + "mean_token_accuracy": 0.8292981028556824, + "num_tokens": 56005734.0, + "step": 4160 + }, + { + "epoch": 2.5395858708891597, + "grad_norm": 0.7029058337211609, + "learning_rate": 3.0920111441677726e-05, + "loss": 0.6274, + "mean_token_accuracy": 0.8268977910280227, + "num_tokens": 56137399.0, + "step": 4170 + }, + { + "epoch": 2.5456760048721074, + "grad_norm": 0.5849781632423401, + "learning_rate": 3.063929373605119e-05, + "loss": 0.5984, + "mean_token_accuracy": 0.8308302730321884, + "num_tokens": 56269293.0, + "step": 4180 + }, + { + "epoch": 2.5517661388550548, + "grad_norm": 0.703788161277771, + "learning_rate": 3.0361906956922358e-05, + "loss": 0.5954, + "mean_token_accuracy": 0.831818374991417, + "num_tokens": 56400293.0, + "step": 4190 + }, + { + "epoch": 2.5578562728380025, + "grad_norm": 0.605482816696167, + "learning_rate": 3.0087963096291965e-05, + "loss": 0.5726, + "mean_token_accuracy": 0.8381782233715057, + "num_tokens": 56537449.0, + "step": 4200 + }, + { + "epoch": 2.56394640682095, + "grad_norm": 0.6472823619842529, + "learning_rate": 2.9817473997316338e-05, + "loss": 0.5827, + "mean_token_accuracy": 0.8334032818675041, + "num_tokens": 56671079.0, + "step": 4210 + }, + { + "epoch": 2.5700365408038977, + "grad_norm": 0.7700249552726746, + "learning_rate": 2.9550451353795366e-05, + "loss": 0.6021, + "mean_token_accuracy": 0.8302450269460678, + "num_tokens": 56807767.0, + "step": 4220 + }, + { + "epoch": 2.5761266747868454, + "grad_norm": 0.6605527400970459, + "learning_rate": 2.9286906709666923e-05, + "loss": 0.6154, + "mean_token_accuracy": 0.8274287924170494, + "num_tokens": 56935160.0, + "step": 4230 + }, + { + "epoch": 2.582216808769793, + "grad_norm": 0.5413617491722107, + "learning_rate": 2.902685145850781e-05, + "loss": 0.579, + "mean_token_accuracy": 0.8385426059365273, + "num_tokens": 57073941.0, + "step": 4240 + }, + { + "epoch": 2.5883069427527405, + "grad_norm": 0.7194695472717285, + "learning_rate": 2.8770296843041234e-05, + "loss": 0.6248, + "mean_token_accuracy": 0.8245450094342232, + "num_tokens": 57200681.0, + "step": 4250 + }, + { + "epoch": 2.5943970767356883, + "grad_norm": 0.5466388463973999, + "learning_rate": 2.851725395465068e-05, + "loss": 0.5959, + "mean_token_accuracy": 0.8355760648846626, + "num_tokens": 57336873.0, + "step": 4260 + }, + { + "epoch": 2.6004872107186356, + "grad_norm": 0.7041544318199158, + "learning_rate": 2.826773373290048e-05, + "loss": 0.6056, + "mean_token_accuracy": 0.8318590998649598, + "num_tokens": 57468023.0, + "step": 4270 + }, + { + "epoch": 2.6065773447015834, + "grad_norm": 0.6439122557640076, + "learning_rate": 2.8021746965062823e-05, + "loss": 0.5836, + "mean_token_accuracy": 0.8366597965359688, + "num_tokens": 57604960.0, + "step": 4280 + }, + { + "epoch": 2.612667478684531, + "grad_norm": 0.701906681060791, + "learning_rate": 2.7779304285651454e-05, + "loss": 0.5703, + "mean_token_accuracy": 0.8376422345638275, + "num_tokens": 57743772.0, + "step": 4290 + }, + { + "epoch": 2.6187576126674785, + "grad_norm": 0.5615540146827698, + "learning_rate": 2.754041617596182e-05, + "loss": 0.5661, + "mean_token_accuracy": 0.8376396596431732, + "num_tokens": 57888421.0, + "step": 4300 + }, + { + "epoch": 2.6248477466504263, + "grad_norm": 0.7024915814399719, + "learning_rate": 2.73050929636181e-05, + "loss": 0.5924, + "mean_token_accuracy": 0.8345223397016526, + "num_tokens": 58026002.0, + "step": 4310 + }, + { + "epoch": 2.630937880633374, + "grad_norm": 0.6473823189735413, + "learning_rate": 2.7073344822126588e-05, + "loss": 0.5799, + "mean_token_accuracy": 0.8390604540705681, + "num_tokens": 58167373.0, + "step": 4320 + }, + { + "epoch": 2.6370280146163214, + "grad_norm": 0.592150866985321, + "learning_rate": 2.6845181770435913e-05, + "loss": 0.5745, + "mean_token_accuracy": 0.8360892593860626, + "num_tokens": 58305631.0, + "step": 4330 + }, + { + "epoch": 2.643118148599269, + "grad_norm": 0.6310701966285706, + "learning_rate": 2.662061367250389e-05, + "loss": 0.6368, + "mean_token_accuracy": 0.8224513292312622, + "num_tokens": 58430217.0, + "step": 4340 + }, + { + "epoch": 2.649208282582217, + "grad_norm": 0.7892738580703735, + "learning_rate": 2.6399650236871114e-05, + "loss": 0.5984, + "mean_token_accuracy": 0.8304956495761872, + "num_tokens": 58559084.0, + "step": 4350 + }, + { + "epoch": 2.6552984165651643, + "grad_norm": 0.6933486461639404, + "learning_rate": 2.6182301016241194e-05, + "loss": 0.6288, + "mean_token_accuracy": 0.8244527041912079, + "num_tokens": 58684693.0, + "step": 4360 + }, + { + "epoch": 2.661388550548112, + "grad_norm": 0.7749532461166382, + "learning_rate": 2.5968575407067848e-05, + "loss": 0.6076, + "mean_token_accuracy": 0.8320668131113053, + "num_tokens": 58817986.0, + "step": 4370 + }, + { + "epoch": 2.6674786845310594, + "grad_norm": 0.5271658301353455, + "learning_rate": 2.5758482649148542e-05, + "loss": 0.5893, + "mean_token_accuracy": 0.8352309837937355, + "num_tokens": 58956432.0, + "step": 4380 + }, + { + "epoch": 2.673568818514007, + "grad_norm": 0.5268961191177368, + "learning_rate": 2.555203182522517e-05, + "loss": 0.5908, + "mean_token_accuracy": 0.8314955353736877, + "num_tokens": 59091108.0, + "step": 4390 + }, + { + "epoch": 2.679658952496955, + "grad_norm": 0.7242282629013062, + "learning_rate": 2.5349231860591298e-05, + "loss": 0.6036, + "mean_token_accuracy": 0.830773600935936, + "num_tokens": 59226935.0, + "step": 4400 + }, + { + "epoch": 2.6857490864799027, + "grad_norm": 0.6606787443161011, + "learning_rate": 2.515009152270638e-05, + "loss": 0.5947, + "mean_token_accuracy": 0.8281330943107605, + "num_tokens": 59358718.0, + "step": 4410 + }, + { + "epoch": 2.69183922046285, + "grad_norm": 0.5850993990898132, + "learning_rate": 2.4954619420816622e-05, + "loss": 0.5842, + "mean_token_accuracy": 0.8339527383446693, + "num_tokens": 59493400.0, + "step": 4420 + }, + { + "epoch": 2.697929354445798, + "grad_norm": 0.6853693127632141, + "learning_rate": 2.47628240055829e-05, + "loss": 0.5869, + "mean_token_accuracy": 0.832540349662304, + "num_tokens": 59628340.0, + "step": 4430 + }, + { + "epoch": 2.704019488428745, + "grad_norm": 0.5214850902557373, + "learning_rate": 2.457471356871536e-05, + "loss": 0.587, + "mean_token_accuracy": 0.8354447767138481, + "num_tokens": 59768106.0, + "step": 4440 + }, + { + "epoch": 2.710109622411693, + "grad_norm": 0.7274345755577087, + "learning_rate": 2.4390296242614934e-05, + "loss": 0.584, + "mean_token_accuracy": 0.83839912712574, + "num_tokens": 59902355.0, + "step": 4450 + }, + { + "epoch": 2.7161997563946407, + "grad_norm": 0.6061622500419617, + "learning_rate": 2.4209580000021777e-05, + "loss": 0.6142, + "mean_token_accuracy": 0.8286945581436157, + "num_tokens": 60033260.0, + "step": 4460 + }, + { + "epoch": 2.7222898903775885, + "grad_norm": 0.65688157081604, + "learning_rate": 2.403257265367063e-05, + "loss": 0.6134, + "mean_token_accuracy": 0.8292039141058922, + "num_tokens": 60165503.0, + "step": 4470 + }, + { + "epoch": 2.728380024360536, + "grad_norm": 0.671927273273468, + "learning_rate": 2.3859281855952982e-05, + "loss": 0.5613, + "mean_token_accuracy": 0.836935906112194, + "num_tokens": 60306999.0, + "step": 4480 + }, + { + "epoch": 2.7344701583434836, + "grad_norm": 0.7475078701972961, + "learning_rate": 2.3689715098586323e-05, + "loss": 0.5809, + "mean_token_accuracy": 0.8373750746250153, + "num_tokens": 60442655.0, + "step": 4490 + }, + { + "epoch": 2.740560292326431, + "grad_norm": 0.6505313515663147, + "learning_rate": 2.3523879712290205e-05, + "loss": 0.5699, + "mean_token_accuracy": 0.838838130235672, + "num_tokens": 60584753.0, + "step": 4500 + }, + { + "epoch": 2.7466504263093787, + "grad_norm": 0.6547593474388123, + "learning_rate": 2.336178286646933e-05, + "loss": 0.6043, + "mean_token_accuracy": 0.8289906069636345, + "num_tokens": 60716008.0, + "step": 4510 + }, + { + "epoch": 2.7527405602923265, + "grad_norm": 0.6438285708427429, + "learning_rate": 2.3203431568903587e-05, + "loss": 0.5943, + "mean_token_accuracy": 0.8335410162806511, + "num_tokens": 60852035.0, + "step": 4520 + }, + { + "epoch": 2.7588306942752743, + "grad_norm": 0.5707643628120422, + "learning_rate": 2.304883266544519e-05, + "loss": 0.5651, + "mean_token_accuracy": 0.8370564222335816, + "num_tokens": 60991573.0, + "step": 4530 + }, + { + "epoch": 2.7649208282582216, + "grad_norm": 0.7300040125846863, + "learning_rate": 2.2897992839722563e-05, + "loss": 0.6344, + "mean_token_accuracy": 0.8223764657974243, + "num_tokens": 61116667.0, + "step": 4540 + }, + { + "epoch": 2.7710109622411694, + "grad_norm": 0.781278133392334, + "learning_rate": 2.27509186128515e-05, + "loss": 0.5983, + "mean_token_accuracy": 0.8337039306759835, + "num_tokens": 61250372.0, + "step": 4550 + }, + { + "epoch": 2.7771010962241167, + "grad_norm": 0.6679220795631409, + "learning_rate": 2.260761634315322e-05, + "loss": 0.5621, + "mean_token_accuracy": 0.8398270666599273, + "num_tokens": 61396984.0, + "step": 4560 + }, + { + "epoch": 2.7831912302070645, + "grad_norm": 0.6907692551612854, + "learning_rate": 2.2468092225879466e-05, + "loss": 0.604, + "mean_token_accuracy": 0.8311845645308494, + "num_tokens": 61532028.0, + "step": 4570 + }, + { + "epoch": 2.7892813641900123, + "grad_norm": 0.5401070713996887, + "learning_rate": 2.2332352292944697e-05, + "loss": 0.6033, + "mean_token_accuracy": 0.8319179087877273, + "num_tokens": 61666037.0, + "step": 4580 + }, + { + "epoch": 2.79537149817296, + "grad_norm": 0.7261266112327576, + "learning_rate": 2.2200402412665298e-05, + "loss": 0.5679, + "mean_token_accuracy": 0.8387535363435745, + "num_tokens": 61806461.0, + "step": 4590 + }, + { + "epoch": 2.8014616321559074, + "grad_norm": 0.6658375263214111, + "learning_rate": 2.2072248289505863e-05, + "loss": 0.6096, + "mean_token_accuracy": 0.8302027896046639, + "num_tokens": 61936714.0, + "step": 4600 + }, + { + "epoch": 2.807551766138855, + "grad_norm": 0.6202580332756042, + "learning_rate": 2.194789546383265e-05, + "loss": 0.5684, + "mean_token_accuracy": 0.8411198407411575, + "num_tokens": 62074886.0, + "step": 4610 + }, + { + "epoch": 2.8136419001218025, + "grad_norm": 0.6867278218269348, + "learning_rate": 2.1827349311673956e-05, + "loss": 0.6041, + "mean_token_accuracy": 0.8294085919857025, + "num_tokens": 62205784.0, + "step": 4620 + }, + { + "epoch": 2.8197320341047503, + "grad_norm": 0.6569831967353821, + "learning_rate": 2.1710615044487803e-05, + "loss": 0.6202, + "mean_token_accuracy": 0.8258814692497254, + "num_tokens": 62334349.0, + "step": 4630 + }, + { + "epoch": 2.825822168087698, + "grad_norm": 0.6896407604217529, + "learning_rate": 2.1597697708936558e-05, + "loss": 0.5865, + "mean_token_accuracy": 0.8326056882739067, + "num_tokens": 62466787.0, + "step": 4640 + }, + { + "epoch": 2.831912302070646, + "grad_norm": 0.6883603930473328, + "learning_rate": 2.1488602186668787e-05, + "loss": 0.5853, + "mean_token_accuracy": 0.8355178698897362, + "num_tokens": 62602765.0, + "step": 4650 + }, + { + "epoch": 2.838002436053593, + "grad_norm": 0.6500746607780457, + "learning_rate": 2.1383333194108245e-05, + "loss": 0.5693, + "mean_token_accuracy": 0.8407857313752174, + "num_tokens": 62741245.0, + "step": 4660 + }, + { + "epoch": 2.844092570036541, + "grad_norm": 0.6719433665275574, + "learning_rate": 2.1281895282249874e-05, + "loss": 0.5867, + "mean_token_accuracy": 0.8356816500425339, + "num_tokens": 62876137.0, + "step": 4670 + }, + { + "epoch": 2.8501827040194883, + "grad_norm": 0.5427895188331604, + "learning_rate": 2.1184292836463194e-05, + "loss": 0.5869, + "mean_token_accuracy": 0.8338323414325715, + "num_tokens": 63010203.0, + "step": 4680 + }, + { + "epoch": 2.856272838002436, + "grad_norm": 0.6588513255119324, + "learning_rate": 2.10905300763026e-05, + "loss": 0.5824, + "mean_token_accuracy": 0.837106254696846, + "num_tokens": 63152549.0, + "step": 4690 + }, + { + "epoch": 2.862362971985384, + "grad_norm": 0.6410390138626099, + "learning_rate": 2.1000611055324987e-05, + "loss": 0.57, + "mean_token_accuracy": 0.8419015809893609, + "num_tokens": 63287537.0, + "step": 4700 + }, + { + "epoch": 2.868453105968331, + "grad_norm": 0.6098606586456299, + "learning_rate": 2.09145396609145e-05, + "loss": 0.5791, + "mean_token_accuracy": 0.8359826967120171, + "num_tokens": 63424589.0, + "step": 4710 + }, + { + "epoch": 2.874543239951279, + "grad_norm": 0.7105115652084351, + "learning_rate": 2.083231961411448e-05, + "loss": 0.5915, + "mean_token_accuracy": 0.8325518026947976, + "num_tokens": 63554175.0, + "step": 4720 + }, + { + "epoch": 2.8806333739342267, + "grad_norm": 0.648669421672821, + "learning_rate": 2.0753954469466614e-05, + "loss": 0.6047, + "mean_token_accuracy": 0.8295484691858291, + "num_tokens": 63682998.0, + "step": 4730 + }, + { + "epoch": 2.886723507917174, + "grad_norm": 0.657294750213623, + "learning_rate": 2.0679447614857204e-05, + "loss": 0.5921, + "mean_token_accuracy": 0.8348564639687538, + "num_tokens": 63820437.0, + "step": 4740 + }, + { + "epoch": 2.892813641900122, + "grad_norm": 0.5496834516525269, + "learning_rate": 2.0608802271370776e-05, + "loss": 0.5599, + "mean_token_accuracy": 0.8398202702403068, + "num_tokens": 63964469.0, + "step": 4750 + }, + { + "epoch": 2.8989037758830696, + "grad_norm": 0.7021865248680115, + "learning_rate": 2.0542021493150766e-05, + "loss": 0.6405, + "mean_token_accuracy": 0.8221626535058022, + "num_tokens": 64085097.0, + "step": 4760 + }, + { + "epoch": 2.904993909866017, + "grad_norm": 0.5804896950721741, + "learning_rate": 2.0479108167267523e-05, + "loss": 0.6079, + "mean_token_accuracy": 0.8265683457255364, + "num_tokens": 64215053.0, + "step": 4770 + }, + { + "epoch": 2.9110840438489647, + "grad_norm": 0.7602665424346924, + "learning_rate": 2.0420065013593475e-05, + "loss": 0.5827, + "mean_token_accuracy": 0.8372338116168976, + "num_tokens": 64348756.0, + "step": 4780 + }, + { + "epoch": 2.9171741778319125, + "grad_norm": 0.6796186566352844, + "learning_rate": 2.0364894584685548e-05, + "loss": 0.5918, + "mean_token_accuracy": 0.8328269824385643, + "num_tokens": 64484863.0, + "step": 4790 + }, + { + "epoch": 2.92326431181486, + "grad_norm": 0.5706749558448792, + "learning_rate": 2.0313599265674807e-05, + "loss": 0.572, + "mean_token_accuracy": 0.8380152434110641, + "num_tokens": 64621317.0, + "step": 4800 + }, + { + "epoch": 2.9293544457978076, + "grad_norm": 0.7671577334403992, + "learning_rate": 2.0266181274163366e-05, + "loss": 0.5842, + "mean_token_accuracy": 0.832770548760891, + "num_tokens": 64753894.0, + "step": 4810 + }, + { + "epoch": 2.9354445797807553, + "grad_norm": 0.6405246257781982, + "learning_rate": 2.022264266012849e-05, + "loss": 0.5511, + "mean_token_accuracy": 0.8441521510481834, + "num_tokens": 64897014.0, + "step": 4820 + }, + { + "epoch": 2.9415347137637027, + "grad_norm": 0.6317846179008484, + "learning_rate": 2.0182985305833967e-05, + "loss": 0.5965, + "mean_token_accuracy": 0.833104345202446, + "num_tokens": 65029007.0, + "step": 4830 + }, + { + "epoch": 2.9476248477466505, + "grad_norm": 0.5668535232543945, + "learning_rate": 2.0147210925748773e-05, + "loss": 0.577, + "mean_token_accuracy": 0.8383101314306259, + "num_tokens": 65166793.0, + "step": 4840 + }, + { + "epoch": 2.953714981729598, + "grad_norm": 0.5817210674285889, + "learning_rate": 2.0115321066472894e-05, + "loss": 0.5967, + "mean_token_accuracy": 0.8329069703817368, + "num_tokens": 65302274.0, + "step": 4850 + }, + { + "epoch": 2.9598051157125456, + "grad_norm": 0.764815628528595, + "learning_rate": 2.0087317106670535e-05, + "loss": 0.6026, + "mean_token_accuracy": 0.830155149102211, + "num_tokens": 65434310.0, + "step": 4860 + }, + { + "epoch": 2.9658952496954933, + "grad_norm": 0.6195541620254517, + "learning_rate": 2.0063200257010438e-05, + "loss": 0.5662, + "mean_token_accuracy": 0.8380124986171722, + "num_tokens": 65571543.0, + "step": 4870 + }, + { + "epoch": 2.971985383678441, + "grad_norm": 0.680578351020813, + "learning_rate": 2.0042971560113606e-05, + "loss": 0.5897, + "mean_token_accuracy": 0.8346181452274323, + "num_tokens": 65705371.0, + "step": 4880 + }, + { + "epoch": 2.9780755176613884, + "grad_norm": 0.5930168628692627, + "learning_rate": 2.002663189050819e-05, + "loss": 0.5779, + "mean_token_accuracy": 0.8375312358140945, + "num_tokens": 65842664.0, + "step": 4890 + }, + { + "epoch": 2.9841656516443362, + "grad_norm": 0.5821495652198792, + "learning_rate": 2.001418195459171e-05, + "loss": 0.6032, + "mean_token_accuracy": 0.8325582191348075, + "num_tokens": 65977282.0, + "step": 4900 + }, + { + "epoch": 2.9902557856272836, + "grad_norm": 0.7727954983711243, + "learning_rate": 2.0005622290600484e-05, + "loss": 0.6033, + "mean_token_accuracy": 0.829984401166439, + "num_tokens": 66105864.0, + "step": 4910 + }, + { + "epoch": 2.9963459196102313, + "grad_norm": 0.7271150350570679, + "learning_rate": 2.000095326858638e-05, + "loss": 0.5762, + "mean_token_accuracy": 0.8358065068721772, + "num_tokens": 66241202.0, + "step": 4920 + } + ], + "logging_steps": 10, + "max_steps": 4926, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 8.098300386001027e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4926/training_args.bin b/checkpoint-4926/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7aaf98c04505b149e2e8903151128d95e7ab7b98 --- /dev/null +++ b/checkpoint-4926/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d57fe99d74b7e16ba38edb5265078ef5a69a65f6b630b2ca3feaacdb770f49e +size 6161 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..a463562962fc1910d6c169b0a7e9c95872abc92e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,1817 @@ +{ + "additional_special_tokens": [ + { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e0ace705cfa5bd60a370b9daafe3e1513770b9 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec3af79eb37b392bb5382bfe3f4eeab633498c220a804f4fd5d7d102a000f1a +size 27914312 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..923c92874790920d6eaf5c317c2d63cd3b569e62 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2489 @@ +{ + "added_tokens_decoder": { + "1529": { + "content": "SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2022": { + "content": "IC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2416": { + "content": "AD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3360": { + "content": "OS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4478": { + "content": "IV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5173": { + "content": "PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6258": { + "content": "IST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6781": { + "content": "CA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7726": { + "content": "FO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9375": { + "content": "REE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9760": { + "content": "EC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10227": { + "content": "TA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11720": { + "content": "LIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12235": { + "content": "NT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12515": { + "content": "RC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13874": { + "content": "MB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13905": { + "content": "GR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15409": { + "content": "DU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17183": { + "content": "DT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19862": { + "content": "SO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20174": { + "content": "FP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22723": { + "content": "VI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27968": { + "content": "SW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29829": { + "content": "CV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29864": { + "content": "GP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34134": { + "content": "ILD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34435": { + "content": "FW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39749": { + "content": "TRL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43230": { + "content": "LAB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46966": { + "content": "SDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47787": { + "content": "CPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47994": { + "content": "MAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50719": { + "content": "IAS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52907": { + "content": "CAR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54793": { + "content": "EOF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58530": { + "content": "PB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66773": { + "content": "DEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68495": { + "content": "HW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70684": { + "content": "SHA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72089": { + "content": "SKU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74923": { + "content": "CAN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77411": { + "content": "AKE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81990": { + "content": "DEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82244": { + "content": "GPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84526": { + "content": "POR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86154": { + "content": "IID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86297": { + "content": "CSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93660": { + "content": "ACL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94432": { + "content": "TZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95202": { + "content": "SQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100413": { + "content": "PSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104755": { + "content": "DMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104805": { + "content": "BW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106979": { + "content": "OTP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110871": { + "content": "CRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117565": { + "content": "FPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118754": { + "content": "IPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126731": { + "content": "OEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126978": { + "content": "AES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130911": { + "content": "RSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147130": { + "content": "CTR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "OSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152076": { + "content": "IOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152095": { + "content": "SPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152119": { + "content": "CDD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155474": { + "content": "SCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158359": { + "content": "ipc", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162121": { + "content": "PLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166996": { + "content": "CBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171893": { + "content": "DISPLAY", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172873": { + "content": "AAD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175772": { + "content": "TEE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177970": { + "content": "ECB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178261": { + "content": "ECC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178974": { + "content": "PCR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186075": { + "content": "IPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187697": { + "content": "SSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199998": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199999": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200000": { + "content": "<|reserved_200000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200001": { + "content": "<|reserved_200001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200002": { + "content": "<|return|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200003": { + "content": "<|constrain|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200004": { + "content": "<|reserved_200004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200005": { + "content": "<|channel|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200006": { + "content": "<|start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200007": { + "content": "<|end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200008": { + "content": "<|message|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200009": { + "content": "<|reserved_200009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200010": { + "content": "<|reserved_200010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200011": { + "content": "<|reserved_200011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200012": { + "content": "<|call|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200013": { + "content": "<|reserved_200013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200014": { + "content": "<|reserved_200014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200015": { + "content": "<|reserved_200015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200016": { + "content": "<|reserved_200016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200017": { + "content": "<|reserved_200017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200018": { + "content": "<|endofprompt|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200019": { + "content": "AArch64", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200020": { + "content": "AES256GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200021": { + "content": "AESCBC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200022": { + "content": "AON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200023": { + "content": "ASID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200024": { + "content": "AXI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200025": { + "content": "Acronym", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200026": { + "content": "AoU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200027": { + "content": "AutoSar", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200028": { + "content": "BAM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200029": { + "content": "BCH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200030": { + "content": "BIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200031": { + "content": "BOM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200032": { + "content": "BPMP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200033": { + "content": "BPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200034": { + "content": "BPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200035": { + "content": "BRBCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200036": { + "content": "C2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200037": { + "content": "CANFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200038": { + "content": "CAVP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200039": { + "content": "CBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200040": { + "content": "CBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200041": { + "content": "CCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200042": { + "content": "CCPLEX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200043": { + "content": "CCPLEX_L2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200044": { + "content": "CCPLEX_MISC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200045": { + "content": "CCPLEX_SCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200046": { + "content": "CIF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200047": { + "content": "CMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200048": { + "content": "CPE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200049": { + "content": "CSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200050": { + "content": "CTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200051": { + "content": "DBB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200052": { + "content": "DFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200053": { + "content": "DFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200054": { + "content": "DIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200055": { + "content": "DLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200056": { + "content": "DMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200057": { + "content": "DPA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200058": { + "content": "DSC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200059": { + "content": "DVMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200060": { + "content": "ECDHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200061": { + "content": "ECDSA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200062": { + "content": "ECID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200063": { + "content": "EDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200064": { + "content": "EOTTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200065": { + "content": "EQoS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200066": { + "content": "FCL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200067": { + "content": "FHTI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200068": { + "content": "FIPS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200069": { + "content": "FMEA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200070": { + "content": "FMON", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200071": { + "content": "FuSa", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200072": { + "content": "GCM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200073": { + "content": "GFD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200074": { + "content": "GIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200075": { + "content": "GMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200076": { + "content": "GMSL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200077": { + "content": "GOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200078": { + "content": "GPCDMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200079": { + "content": "Gpps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200080": { + "content": "HBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200081": { + "content": "HBR2", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200082": { + "content": "HBR3", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200083": { + "content": "HDS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200084": { + "content": "HIS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200085": { + "content": "HMAC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200086": { + "content": "HPSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200087": { + "content": "HSI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200088": { + "content": "HSM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200089": { + "content": "HSP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200090": { + "content": "ICD", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200091": { + "content": "IDR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200092": { + "content": "IDT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200093": { + "content": "IEP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200094": { + "content": "IEU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200095": { + "content": "IFU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200096": { + "content": "IMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200097": { + "content": "IOFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200098": { + "content": "IOMMU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200099": { + "content": "IRF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200100": { + "content": "IoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200101": { + "content": "JSR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200102": { + "content": "KAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200103": { + "content": "KCV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200104": { + "content": "KDF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200105": { + "content": "KPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200106": { + "content": "L1PT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200107": { + "content": "L2C", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200108": { + "content": "L2mDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200109": { + "content": "L2vDIR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200110": { + "content": "LBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200111": { + "content": "LDC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200112": { + "content": "LFT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200113": { + "content": "LIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200114": { + "content": "LSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200115": { + "content": "MAQ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200116": { + "content": "MBIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200117": { + "content": "MCAL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200118": { + "content": "MCE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200119": { + "content": "MCU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200120": { + "content": "MSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200121": { + "content": "MSS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200122": { + "content": "MST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200123": { + "content": "MTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200124": { + "content": "MiTM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200125": { + "content": "NIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200126": { + "content": "NIST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200127": { + "content": "NITO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200128": { + "content": "NOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200129": { + "content": "NOOP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200130": { + "content": "NVDEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200131": { + "content": "NVENC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200132": { + "content": "NVJPG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200133": { + "content": "NVM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200134": { + "content": "NVVSE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200135": { + "content": "OFA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200136": { + "content": "PCIE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200137": { + "content": "PCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200138": { + "content": "PCT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200139": { + "content": "PDK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200140": { + "content": "PII", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200141": { + "content": "PIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200142": { + "content": "PKC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200143": { + "content": "PKCS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200144": { + "content": "PKI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200145": { + "content": "PPC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200146": { + "content": "PTXT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200147": { + "content": "PVA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200148": { + "content": "QNX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200149": { + "content": "QOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200150": { + "content": "QSPI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200151": { + "content": "RBG", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200152": { + "content": "RBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200153": { + "content": "RDEV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200154": { + "content": "RMA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200155": { + "content": "RMW", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200156": { + "content": "RSB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200157": { + "content": "RTS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200158": { + "content": "RoT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200159": { + "content": "SAE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200160": { + "content": "SBK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200161": { + "content": "SEL0", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200162": { + "content": "SEL1", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200163": { + "content": "SEooC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200164": { + "content": "SGM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200165": { + "content": "SHA256", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200166": { + "content": "SHA512", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200167": { + "content": "SHE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200168": { + "content": "SNOC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200169": { + "content": "SST", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200170": { + "content": "SWAT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200171": { + "content": "SoC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200172": { + "content": "TCF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200173": { + "content": "THI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200174": { + "content": "TNR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200175": { + "content": "TOS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200176": { + "content": "TRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200177": { + "content": "TSEC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200178": { + "content": "UFS", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200179": { + "content": "VBR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200180": { + "content": "VCPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200181": { + "content": "VIC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200182": { + "content": "VMEM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200183": { + "content": "VMID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200184": { + "content": "VPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200185": { + "content": "VRC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200186": { + "content": "VUI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200187": { + "content": "WARB", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200188": { + "content": "XIP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200189": { + "content": "bpp", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200190": { + "content": "eMMC", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200191": { + "content": "hfPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200192": { + "content": "iGPU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200193": { + "content": "ipc_fg", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200194": { + "content": "ipc_t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200195": { + "content": "sbPLA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200196": { + "content": "xBTV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200197": { + "content": "xps", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "AAD", + "AArch64", + "ACL", + "AD", + "AES", + "AES256GCM", + "AESCBC", + "AKE", + "AON", + "ASID", + "AXI", + "Acronym", + "AoU", + "AutoSar", + "BAM", + "BCH", + "BIST", + "BOM", + "BPMP", + "BPS", + "BPU", + "BRBCT", + "BW", + "C2C", + "CA", + "CAN", + "CANFD", + "CAR", + "CAVP", + "CBB", + "CBC", + "CBR", + "CCM", + "CCPLEX", + "CCPLEX_L2", + "CCPLEX_MISC", + "CCPLEX_SCF", + "CDD", + "CIF", + "CMAC", + "CPE", + "CPU", + "CRC", + "CSI", + "CSP", + "CTR", + "CTXT", + "CV", + "DBB", + "DEP", + "DEV", + "DFA", + "DFT", + "DIP", + "DISPLAY", + "DLA", + "DMA", + "DMEM", + "DPA", + "DSC", + "DT", + "DU", + "DVMU", + "EC", + "ECB", + "ECC", + "ECDHE", + "ECDSA", + "ECID", + "EDR", + "EOF", + "EOTTI", + "EQoS", + "FCL", + "FHTI", + "FIPS", + "FMEA", + "FMON", + "FO", + "FP", + "FPS", + "FW", + "FuSa", + "GCM", + "GFD", + "GIC", + "GMAC", + "GMSL", + "GOP", + "GP", + "GPCDMA", + "GPU", + "GR", + "Gpps", + "HBR", + "HBR2", + "HBR3", + "HDS", + "HIS", + "HMAC", + "HPSE", + "HSI", + "HSM", + "HSP", + "HW", + "IAS", + "IC", + "ICD", + "IDR", + "IDT", + "IEP", + "IEU", + "IFU", + "IID", + "ILD", + "IMEM", + "IOC", + "IOFA", + "IOMMU", + "IPC", + "IPI", + "IRF", + "IST", + "IV", + "IoT", + "JSR", + "KAT", + "KCV", + "KDF", + "KPI", + "L1PT", + "L2C", + "L2mDIR", + "L2vDIR", + "LAB", + "LBIST", + "LDC", + "LFT", + "LIC", + "LIP", + "LSB", + "MAC", + "MAQ", + "MB", + "MBIST", + "MCAL", + "MCE", + "MCU", + "MSB", + "MSS", + "MST", + "MTS", + "MiTM", + "NIP", + "NIST", + "NITO", + "NOC", + "NOOP", + "NT", + "NVDEC", + "NVENC", + "NVJPG", + "NVM", + "NVVSE", + "OEM", + "OFA", + "OS", + "OSP", + "OTP", + "PB", + "PCIE", + "PCPU", + "PCR", + "PCT", + "PDK", + "PII", + "PIP", + "PKC", + "PKCS", + "PKI", + "PL", + "PLA", + "POR", + "PPC", + "PSC", + "PTXT", + "PVA", + "QNX", + "QOS", + "QSPI", + "RBG", + "RBR", + "RC", + "RDEV", + "REE", + "RMA", + "RMW", + "RSA", + "RSB", + "RTS", + "RoT", + "SAE", + "SBK", + "SCH", + "SDK", + "SE", + "SEL0", + "SEL1", + "SEooC", + "SGM", + "SHA", + "SHA256", + "SHA512", + "SHE", + "SKU", + "SNOC", + "SO", + "SPA", + "SQ", + "SSR", + "SST", + "SW", + "SWAT", + "SoC", + "TA", + "TCF", + "TEE", + "THI", + "TNR", + "TOS", + "TRC", + "TRL", + "TSEC", + "TZ", + "UFS", + "VBR", + "VCPU", + "VI", + "VIC", + "VMEM", + "VMID", + "VPU", + "VRC", + "VUI", + "WARB", + "XIP", + "bpp", + "eMMC", + "hfPLA", + "iGPU", + "ipc", + "ipc_fg", + "ipc_t", + "sbPLA", + "xBTV", + "xps" + ], + "bos_token": "<|startoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|return|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|endoftext|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7aaf98c04505b149e2e8903151128d95e7ab7b98 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d57fe99d74b7e16ba38edb5265078ef5a69a65f6b630b2ca3feaacdb770f49e +size 6161