Aloukik21 commited on
Commit
c7465f4
·
verified ·
1 Parent(s): c216409

Remove root-level files (moved to TTS/DiffRhythm/)

Browse files
MuQ-MuLan-large/config.json DELETED
@@ -1,41 +0,0 @@
1
- {
2
- "mulan": {
3
- "sr": 24000,
4
- "clip_secs": 10,
5
- "dim_latent": 512,
6
- "decoupled_contrastive_learning": true,
7
- "hierarchical_contrastive_loss": false,
8
- "hierarchical_contrastive_loss_layers": null,
9
- "sigmoid_contrastive_loss": false,
10
- "rank_contrast": true
11
- },
12
- "audio_model": {
13
- "name": "OpenMuQ/MuQ-large-msd-iter",
14
- "model_dim": 1024,
15
- "use_layer_idx": -1
16
- },
17
- "text_model": {
18
- "name": "xlm-roberta-base",
19
- "model_dim": null,
20
- "use_layer_idx": -1
21
- },
22
- "audio_transformer": {
23
- "dim": 768,
24
- "tf_depth": 0,
25
- "heads": 8,
26
- "dim_head": 64,
27
- "attn_dropout": 0,
28
- "ff_dropout": 0,
29
- "ff_mult": 4
30
- },
31
- "text_transformer": {
32
- "dim": 768,
33
- "tf_depth": 8,
34
- "max_seq_len": 1024,
35
- "dim_head": 64,
36
- "heads": 8,
37
- "attn_dropout": 0,
38
- "ff_dropout": 0,
39
- "ff_mult": 4
40
- }
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MuQ-MuLan-large/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42ae3f7cb9b66759ee0089ddc70e2f28b130c2d8ba621457358272d32dd0444
3
- size 2653954401
 
 
 
 
MuQ-large-msd-iter/config.json DELETED
@@ -1,143 +0,0 @@
1
- {
2
- "codebook_dim": 16,
3
- "codebook_size": 8192,
4
- "conv_dim": 512,
5
- "encoder_depth": 12,
6
- "encoder_dim": 1024,
7
- "features": [
8
- "melspec_2048"
9
- ],
10
- "hop_length": 240,
11
- "is_flash": false,
12
- "label_rate": 25,
13
- "mask_hop": 0.4,
14
- "mask_prob": 0.6,
15
- "n_mels": 128,
16
- "num_codebooks": 1,
17
- "recon_loss_ratio": null,
18
- "resume_checkpoint": null,
19
- "rvq_ckpt_path": null,
20
- "rvq_multi_layer_num": 1,
21
- "rvq_n_codebooks": 8,
22
- "stat": {
23
- "melspec_2048_cnt": 14282760192,
24
- "melspec_2048_mean": 6.768444971712967,
25
- "melspec_2048_std": 18.417922652295623
26
- },
27
- "use_encodec_target": false,
28
- "use_rvq_target": true,
29
- "use_vq_target": false,
30
- "w2v2_config": {
31
- "activation_dropout": 0.1,
32
- "adapter_kernel_size": 3,
33
- "adapter_stride": 2,
34
- "add_adapter": false,
35
- "apply_spec_augment": true,
36
- "architectures": [
37
- "Wav2Vec2ConformerForCTC"
38
- ],
39
- "attention_dropout": 0.1,
40
- "bos_token_id": 1,
41
- "classifier_proj_size": 256,
42
- "codevector_dim": 768,
43
- "conformer_conv_dropout": 0.1,
44
- "contrastive_logits_temperature": 0.1,
45
- "conv_bias": true,
46
- "conv_depthwise_kernel_size": 31,
47
- "conv_dim": [
48
- 512,
49
- 512,
50
- 512,
51
- 512,
52
- 512,
53
- 512,
54
- 512
55
- ],
56
- "conv_kernel": [
57
- 10,
58
- 3,
59
- 3,
60
- 3,
61
- 3,
62
- 2,
63
- 2
64
- ],
65
- "conv_stride": [
66
- 5,
67
- 2,
68
- 2,
69
- 2,
70
- 2,
71
- 2,
72
- 2
73
- ],
74
- "ctc_loss_reduction": "sum",
75
- "ctc_zero_infinity": false,
76
- "diversity_loss_weight": 0.1,
77
- "do_stable_layer_norm": true,
78
- "eos_token_id": 2,
79
- "feat_extract_activation": "gelu",
80
- "feat_extract_dropout": 0.0,
81
- "feat_extract_norm": "layer",
82
- "feat_proj_dropout": 0.1,
83
- "feat_quantizer_dropout": 0.0,
84
- "final_dropout": 0.1,
85
- "gradient_checkpointing": false,
86
- "hidden_act": "swish",
87
- "hidden_dropout": 0.1,
88
- "hidden_dropout_prob": 0.1,
89
- "hidden_size": 1024,
90
- "initializer_range": 0.02,
91
- "intermediate_size": 4096,
92
- "layer_norm_eps": 1e-05,
93
- "layerdrop": 0.0,
94
- "mask_feature_length": 10,
95
- "mask_feature_min_masks": 0,
96
- "mask_feature_prob": 0.0,
97
- "mask_time_length": 10,
98
- "mask_time_min_masks": 2,
99
- "mask_time_prob": 0.05,
100
- "max_source_positions": 5000,
101
- "model_type": "wav2vec2-conformer",
102
- "num_adapter_layers": 3,
103
- "num_attention_heads": 16,
104
- "num_codevector_groups": 2,
105
- "num_codevectors_per_group": 320,
106
- "num_conv_pos_embedding_groups": 16,
107
- "num_conv_pos_embeddings": 128,
108
- "num_feat_extract_layers": 7,
109
- "num_hidden_layers": 24,
110
- "num_negatives": 100,
111
- "output_hidden_size": 1024,
112
- "pad_token_id": 0,
113
- "position_embeddings_type": "rotary",
114
- "proj_codevector_dim": 768,
115
- "rotary_embedding_base": 10000,
116
- "tdnn_dilation": [
117
- 1,
118
- 2,
119
- 3,
120
- 1,
121
- 1
122
- ],
123
- "tdnn_dim": [
124
- 512,
125
- 512,
126
- 512,
127
- 512,
128
- 1500
129
- ],
130
- "tdnn_kernel": [
131
- 5,
132
- 3,
133
- 3,
134
- 1,
135
- 1
136
- ],
137
- "torch_dtype": "float32",
138
- "transformers_version": "4.19.0.dev0",
139
- "use_weighted_layer_sum": false,
140
- "vocab_size": 32,
141
- "xvector_output_dim": 512
142
- }
143
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MuQ-large-msd-iter/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:273febab2be02872c37d2c37e48a9d6c52c1c9392f3eeeabd498efa281ccb7a6
3
- size 1333825096
 
 
 
 
MuQ-large-msd-iter/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:334df3de2832ec1acfd8b6ce54e7de4073401fe821f7ec0ad0d954832be2d26a
3
- size 1333965438
 
 
 
 
cfm_model_v1_2.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e819b317ce2cf1fb22f386d74f351b697204ec1f57f03edfe50dbca71cf0768
3
- size 2218709125
 
 
 
 
config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "model_type": "diffrhythm",
3
- "model": {
4
- "dim": 2048,
5
- "depth": 16,
6
- "heads": 32,
7
- "ff_mult": 4,
8
- "text_dim": 512,
9
- "conv_layers": 4,
10
- "mel_dim": 64,
11
- "text_num_embeds": 363
12
- }
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval-model/eval.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81cbd54af8b103251e425fcbd8f5313975cb742e760c3dae1e10f99969933fd6
3
- size 100792276
 
 
 
 
eval-model/eval.yaml DELETED
@@ -1,6 +0,0 @@
1
- generator:
2
- _target_: pretrained.eval.Generator
3
- in_features: 1024
4
- ffd_hidden_size: 4096
5
- num_classes: 5
6
- attn_layer_num: 4
 
 
 
 
 
 
 
vae_model.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:712693f27299937c6ccf1a6d6f1d9b45c7c8c11210d3b0cbb0f36181465ba29f
3
- size 624520127
 
 
 
 
xlm-roberta-base/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "architectures": [
3
- "XLMRobertaForMaskedLM"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "eos_token_id": 2,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 768,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 3072,
13
- "layer_norm_eps": 1e-05,
14
- "max_position_embeddings": 514,
15
- "model_type": "xlm-roberta",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 12,
18
- "output_past": true,
19
- "pad_token_id": 1,
20
- "position_embedding_type": "absolute",
21
- "transformers_version": "4.17.0.dev0",
22
- "type_vocab_size": 1,
23
- "use_cache": true,
24
- "vocab_size": 250002
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xlm-roberta-base/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fd4797bc397c3b8b55d6bb5740366b57e6a3ce91c04c77f22aafc0c128e6feb
3
- size 1115567652
 
 
 
 
xlm-roberta-base/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d83baaafea92d36de26002c8135a427d55ee6fdc4faaa6e400be4c47724a07e
3
- size 1115590446
 
 
 
 
xlm-roberta-base/sentencepiece.bpe.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
- size 5069051
 
 
 
 
xlm-roberta-base/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
xlm-roberta-base/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"model_max_length": 512}