| | --- |
| | base_model: DeepSeek/DeepSeek-R1-Distill-Qwen-1.5B |
| | library_name: transformers |
| | license: mit |
| | tags: |
| | - safe |
| | languages: |
| | - en |
| | - zh |
| | pipeline_tag: text-generation |
| | --- |
| | |
| | # RealSafe-R1-1.5B |
| |
|
| | This repository contains the model card based on the paper [](https://huggingface.co/papers/2504.10081). |
| |
|
| | # File information |
| |
|
| | The repository contains the following file information: |
| |
|
| | Filename: tokenizer.json |
| | Content: "Content of the file is larger than 50 KB, too long to display." |
| |
|
| | Filename: all_results.json |
| | Content: { |
| | "epoch": 0.9978021978021978, |
| | "total_flos": 7339342036992.0, |
| | "train_loss": 1.2485807309591823, |
| | "train_runtime": 995.4655, |
| | "train_samples_per_second": 14.624, |
| | "train_steps_per_second": 0.228 |
| | } |
| |
|
| | Filename: generation_config.json |
| | Content: { |
| | "_from_model_config": true, |
| | "bos_token_id": 151646, |
| | "do_sample": true, |
| | "eos_token_id": 151643, |
| | "temperature": 0.6, |
| | "top_p": 0.95, |
| | "transformers_version": "4.45.2" |
| | } |
| | |
| | Filename: train_results.json |
| | Content: { |
| | "epoch": 0.9978021978021978, |
| | "total_flos": 7339342036992.0, |
| | "train_loss": 1.2485807309591823, |
| | "train_runtime": 995.4655, |
| | "train_samples_per_second": 14.624, |
| | "train_steps_per_second": 0.228 |
| | } |
| | |
| | Filename: special_tokens_map.json |
| | Content: { |
| | "bos_token": { |
| | "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false |
| | }, |
| | "eos_token": { |
| | "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false |
| | }, |
| | "pad_token": { |
| | "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false |
| | } |
| | } |
| | |
| | Filename: trainer_state.json |
| | Content: { |
| | "best_metric": null, |
| | "best_model_checkpoint": null, |
| | "epoch": 0.9978021978021978, |
| | "eval_steps": 500, |
| | "global_step": 227, |
| | "is_hyper_param_search": false, |
| | "is_local_process_zero": true, |
| | "is_world_process_zero": true, |
| | "log_history": [ |
| | { |
| | "epoch": 0.9978021978021978, |
| | "step": 227, |
| | "total_flos": 7339342036992.0, |
| | "train_loss": 1.2485807309591823, |
| | "train_runtime": 995.4655, |
| | "train_samples_per_second": 14.624, |
| | "train_steps_per_second": 0.228 |
| | } |
| | ], |
| | "logging_steps": 500, |
| | "max_steps": 227, |
| | "num_input_tokens_seen": 0, |
| | "num_train_epochs": 1, |
| | "save_steps": 500, |
| | "stateful_callbacks": { |
| | "TrainerControl": { |
| | "args": { |
| | "should_epoch_stop": false, |
| | "should_evaluate": false, |
| | "should_log": false, |
| | "should_save": true, |
| | "should_training_stop": true |
| | }, |
| | "attributes": {} |
| | } |
| | }, |
| | "total_flos": 7339342036992.0, |
| | "train_batch_size": 2, |
| | "trial_name": null, |
| | "trial_params": null |
| | } |
| | |
| | Filename: tokenizer_config.json |
| | Content: { |
| | "add_bos_token": true, |
| | "add_eos_token": false, |
| | "add_prefix_space": null, |
| | "added_tokens_decoder": { |
| | "151643": { |
| | "content": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151644": { |
| | "content": "<\uff5cUser\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151645": { |
| | "content": "<\uff5cAssistant\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151646": { |
| | "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151647": { |
| | "content": "<|EOT|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151648": { |
| | "content": "<think>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151649": { |
| | "content": "</think>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151650": { |
| | "content": "<|quad_start|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151651": { |
| | "content": "<|quad_end|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151652": { |
| | "content": "<|vision_start|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151653": { |
| | "content": "<|vision_end|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151654": { |
| | "content": "<|vision_pad|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151655": { |
| | "content": "<|image_pad|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151656": { |
| | "content": "<|video_pad|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": true |
| | }, |
| | "151657": { |
| | "content": "<tool_call>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151658": { |
| | "content": "</tool_call>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151659": { |
| | "content": "<|fim_prefix|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151660": { |
| | "content": "<|fim_middle|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151661": { |
| | "content": "<|fim_suffix|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151662": { |
| | "content": "<|fim_pad|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151663": { |
| | "content": "<|repo_name|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | }, |
| | "151664": { |
| | "content": "<|file_sep|>", |
| | "lstrip": false, |
| | "normalized": false, |
| | "rstrip": false, |
| | "single_word": false, |
| | "special": false |
| | } |
| | }, |
| | "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>", |
| | "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
| | ' + '```json' + '\ |
| | ' + tool['function']['arguments'] + '\ |
| | ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\ |
| | ' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\ |
| | ' + '```json' + '\ |
| | ' + tool['function']['arguments'] + '\ |
| | ' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\ |
| | <\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\ |
| | '}}{% endif %}", |
| | "clean_up_tokenization_spaces": false, |
| | "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| | "legacy": true, |
| | "model_max_length": 4096, |
| | "pad_token": "<\uff5cend\u2581of\u2581sentence\uff5c>", |
| | "padding_side": "right", |
| | "sp_model_kwargs": {}, |
| | "split_special_tokens": false, |
| | "tokenizer_class": "LlamaTokenizer", |
| | "unk_token": null, |
| | "use_default_system_prompt": false |
| | } |
| | |
| | Filename: config.json |
| | Content: { |
| | "_name_or_path": "/nfs2/models/DeepSeek-R1-Distill-Qwen-1.5B/", |
| | "architectures": [ |
| | "Qwen2ForCausalLM" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 151646, |
| | "eos_token_id": 151643, |
| | "hidden_act": "silu", |
| | "hidden_size": 1536, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 8960, |
| | "max_position_embeddings": 131072, |
| | "max_window_layers": 21, |
| | "model_type": "qwen2", |
| | "num_attention_heads": 12, |
| | "num_hidden_layers": 28, |
| | "num_key_value_heads": 2, |
| | "rms_norm_eps": 1e-06, |
| | "rope_scaling": null, |
| | "rope_theta": 10000, |
| | "sliding_window": null, |
| | "tie_word_embeddings": false, |
| | "torch_dtype": "bfloat16", |
| | "transformers_version": "4.45.2", |
| | "use_cache": false, |
| | "use_mrope": false, |
| | "use_sliding_window": false, |
| | "vocab_size": 151936 |
| | } |