finhdev commited on
Commit
efade39
·
verified ·
0 Parent(s):

Duplicate from finhdev/testmobileclip

Browse files
Files changed (9) hide show
  1. .gitattributes +35 -0
  2. LICENSE +88 -0
  3. README.md +154 -0
  4. config.json +18 -0
  5. fig_accuracy_latency.png +0 -0
  6. handler.py +216 -0
  7. items.json +902 -0
  8. reparam.py +341 -0
  9. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Disclaimer: IMPORTANT: This Apple Machine Learning Research Model is
2
+ specifically developed and released by Apple Inc. ("Apple") for the sole purpose
3
+ of scientific research of artificial intelligence and machine-learning
4
+ technology. “Apple Machine Learning Research Model” means the model, including
5
+ but not limited to algorithms, formulas, trained model weights, parameters,
6
+ configurations, checkpoints, and any related materials (including
7
+ documentation).
8
+
9
+ This Apple Machine Learning Research Model is provided to You by
10
+ Apple in consideration of your agreement to the following terms, and your use,
11
+ modification, creation of Model Derivatives, and or redistribution of the Apple
12
+ Machine Learning Research Model constitutes acceptance of this Agreement. If You
13
+ do not agree with these terms, please do not use, modify, create Model
14
+ Derivatives of, or distribute this Apple Machine Learning Research Model or
15
+ Model Derivatives.
16
+
17
+ * License Scope: In consideration of your agreement to abide by the following
18
+ terms, and subject to these terms, Apple hereby grants you a personal,
19
+ non-exclusive, worldwide, non-transferable, royalty-free, revocable, and
20
+ limited license, to use, copy, modify, distribute, and create Model
21
+ Derivatives (defined below) of the Apple Machine Learning Research Model
22
+ exclusively for Research Purposes. You agree that any Model Derivatives You
23
+ may create or that may be created for You will be limited to Research Purposes
24
+ as well. “Research Purposes” means non-commercial scientific research and
25
+ academic development activities, such as experimentation, analysis, testing
26
+ conducted by You with the sole intent to advance scientific knowledge and
27
+ research. “Research Purposes” does not include any commercial exploitation,
28
+ product development or use in any commercial product or service.
29
+
30
+ * Distribution of Apple Machine Learning Research Model and Model Derivatives:
31
+ If you choose to redistribute Apple Machine Learning Research Model or its
32
+ Model Derivatives, you must provide a copy of this Agreement to such third
33
+ party, and ensure that the following attribution notice be provided: “Apple
34
+ Machine Learning Research Model is licensed under the Apple Machine Learning
35
+ Research Model License Agreement.” Additionally, all Model Derivatives must
36
+ clearly be identified as such, including disclosure of modifications and
37
+ changes made to the Apple Machine Learning Research Model. The name,
38
+ trademarks, service marks or logos of Apple may not be used to endorse or
39
+ promote Model Derivatives or the relationship between You and Apple. “Model
40
+ Derivatives” means any models or any other artifacts created by modifications,
41
+ improvements, adaptations, alterations to the architecture, algorithm or
42
+ training processes of the Apple Machine Learning Research Model, or by any
43
+ retraining, fine-tuning of the Apple Machine Learning Research Model.
44
+
45
+ * No Other License: Except as expressly stated in this notice, no other rights
46
+ or licenses, express or implied, are granted by Apple herein, including but
47
+ not limited to any patent, trademark, and similar intellectual property rights
48
+ worldwide that may be infringed by the Apple Machine Learning Research Model,
49
+ the Model Derivatives or by other works in which the Apple Machine Learning
50
+ Research Model may be incorporated.
51
+
52
+ * Compliance with Laws: Your use of Apple Machine Learning Research Model must
53
+ be in compliance with all applicable laws and regulations.
54
+
55
+ * Term and Termination: The term of this Agreement will begin upon your
56
+ acceptance of this Agreement or use of the Apple Machine Learning Research
57
+ Model and will continue until terminated in accordance with the following
58
+ terms. Apple may terminate this Agreement at any time if You are in breach of
59
+ any term or condition of this Agreement. Upon termination of this Agreement,
60
+ You must cease to use all Apple Machine Learning Research Models and Model
61
+ Derivatives and permanently delete any copy thereof. Sections 3, 6 and 7 will
62
+ survive termination.
63
+
64
+ * Disclaimer and Limitation of Liability: This Apple Machine Learning Research
65
+ Model and any outputs generated by the Apple Machine Learning Research Model
66
+ are provided on an “AS IS” basis. APPLE MAKES NO WARRANTIES, EXPRESS OR
67
+ IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
68
+ NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE,
69
+ REGARDING THE APPLE MACHINE LEARNING RESEARCH MODEL OR OUTPUTS GENERATED BY
70
+ THE APPLE MACHINE LEARNING RESEARCH MODEL. You are solely responsible for
71
+ determining the appropriateness of using or redistributing the Apple Machine
72
+ Learning Research Model and any outputs of the Apple Machine Learning Research
73
+ Model and assume any risks associated with Your use of the Apple Machine
74
+ Learning Research Model and any output and results. IN NO EVENT SHALL APPLE BE
75
+ LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
76
+ IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF
77
+ THE APPLE MACHINE LEARNING RESEARCH MODEL AND ANY OUTPUTS OF THE APPLE MACHINE
78
+ LEARNING RESEARCH MODEL, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT,
79
+ TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS
80
+ BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
81
+
82
+ * Governing Law: This Agreement will be governed by and construed under the laws
83
+ of the State of California without regard to its choice of law principles. The
84
+ Convention on Contracts for the International Sale of Goods shall not apply to
85
+ the Agreement except that the arbitration clause and any arbitration hereunder
86
+ shall be governed by the Federal Arbitration Act, Chapters 1 and 2. 
87
+
88
+ Copyright (C) 2025 Apple Inc. All Rights Reserved.
README.md ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apple-amlr
3
+ license_name: apple-ascl
4
+ license_link: https://github.com/apple/ml-mobileclip/blob/main/LICENSE_weights_data
5
+ library_name: mobileclip
6
+ ---
7
+
8
+ # 📸 MobileCLIP-B Zero-Shot Image Classifier
9
+ ### Hugging Face Inference Endpoint
10
+
11
+ > **Production-ready wrapper** around Apple’s MobileCLIP-B checkpoint.
12
+ > Handles image → text similarity in a single fast call.
13
+
14
+ ---
15
+
16
+ ## 📑 Sidebar
17
+
18
+ - [Features](#-features)
19
+ - [Repository layout](#-repository-layout)
20
+ - [Quick start (local smoke-test)](#-quick-start-local-smoke-test)
21
+ - [Calling the deployed endpoint](#-calling-the-deployed-endpoint)
22
+ - [How it works](#-how-it-works)
23
+ - [Updating the label set](#-updating-the-label-set)
24
+ - [License](#-license)
25
+
26
+ ---
27
+
28
+ ## ✨ Features
29
+ | | This repo |
30
+ |------------------------------|-----------|
31
+ | **Model** | MobileCLIP-B (`datacompdr` checkpoint) |
32
+ | **Branch fusion** | `reparameterize_model` baked in |
33
+ | **Mixed-precision** | FP16 on GPU, FP32 on CPU |
34
+ | **Pre-computed text feats** | One-time encoding of prompts in `items.json` |
35
+ | **Per-request work** | _Only_ image decoding → encode_image → softmax |
36
+ | **Latency (A10G)** | < 30 ms once the image arrives |
37
+
38
+ ---
39
+
40
+ ## 📁 Repository layout
41
+
42
+ | Path | Purpose |
43
+ |--------------------|------------------------------------------------------------------|
44
+ | `handler.py` | HF entry-point (loads model + text cache, serves requests) |
45
+ | `reparam.py` | 60-line stand-alone copy of Apple’s `reparameterize_model` |
46
+ | `requirements.txt` | Minimal dep set (`torch`, `torchvision`, `open-clip-torch`) |
47
+ | `items.json` | Your label set (`id`, `name`, `prompt` per line) |
48
+ | `README.md` | This document |
49
+
50
+ ---
51
+
52
+ ## 🚀 Quick start (local smoke-test)
53
+
54
+ ```bash
55
+ python -m venv venv && source venv/bin/activate
56
+ pip install -r requirements.txt
57
+
58
+ python - <<'PY'
59
+ import base64, json, handler, pathlib
60
+ app = handler.EndpointHandler()
61
+
62
+ img_b64 = base64.b64encode(pathlib.Path("tests/cat.jpg").read_bytes()).decode()
63
+ print(app({"inputs": {"image": img_b64}})[:5]) # top-5 classes
64
+ PY
65
+ ```
66
+
67
+ ---
68
+
69
+ ## 🌐 Calling the deployed endpoint
70
+
71
+ ```bash
72
+ ENDPOINT="https://<your-endpoint>.aws.endpoints.huggingface.cloud"
73
+ TOKEN="hf_xxxxxxxxxxxxxxxxx"
74
+ IMG="cat.jpg"
75
+
76
+ python - <<'PY'
77
+ import base64, json, os, requests, sys
78
+ url = os.environ["ENDPOINT"]
79
+ token = os.environ["TOKEN"]
80
+ img = sys.argv
81
+
82
+ payload = {
83
+ "inputs": {
84
+ "image": base64.b64encode(open(img, "rb").read()).decode()
85
+ }
86
+ }
87
+ resp = requests.post(
88
+ url,
89
+ headers={
90
+ "Authorization": f"Bearer {token}",
91
+ "Content-Type": "application/json",
92
+ "Accept": "application/json",
93
+ },
94
+ json=payload,
95
+ timeout=60,
96
+ )
97
+ print(json.dumps(resp.json()[:5], indent=2))
98
+ PY
99
+ $IMG
100
+ ```
101
+
102
+ *Response example*
103
+
104
+ ```json
105
+ [
106
+ { "id": 23, "label": "cat", "score": 0.92 },
107
+ { "id": 11, "label": "tiger cat", "score": 0.05 },
108
+ { "id": 48, "label": "siamese cat", "score": 0.02 }
109
+ ]
110
+ ```
111
+
112
+ ---
113
+
114
+ ## ⚙️ How it works
115
+
116
+ 1. **Startup (runs once per replica)**
117
+
118
+ * Downloads / loads MobileCLIP-B (`datacompdr`).
119
+ * Fuses MobileOne branches via `reparam.py`.
120
+ * Reads `items.json` and encodes every prompt → `[N,512]` tensor.
121
+
122
+ 2. **Per request**
123
+
124
+ * Decodes base-64 JPEG/PNG.
125
+ * Applies OpenCLIP preprocessing (224 × 224 center-crop + normalise).
126
+ * Encodes the image, normalises, computes cosine similarity vs. cached text matrix.
127
+ * Returns sorted `[{id, label, score}, …]`.
128
+
129
+ ---
130
+
131
+ ## 🔄 Updating the label set
132
+
133
+ Simply edit `items.json`, push, and redeploy.
134
+
135
+ ```json
136
+ [
137
+ { "id": 0, "name": "cat", "prompt": "a photo of a cat" },
138
+ { "id": 1, "name": "dog", "prompt": "a photo of a dog" }
139
+ ]
140
+ ```
141
+
142
+ No code changes are required; the handler re-encodes prompts at start-up.
143
+
144
+ ---
145
+
146
+ ## ⚖️ License
147
+
148
+ * **Weights / data** — Apple AMLR (see [`LICENSE_weights_data`](./LICENSE_weights_data))
149
+ * **This wrapper code** — MIT
150
+
151
+ ---
152
+
153
+ <div align="center"><sub>Maintained with ❤️ by Your-Team — Aug 2025</sub></div>
154
+
config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "embed_dim": 512,
3
+ "image_cfg": {
4
+ "image_size": 224,
5
+ "model_name": "vit_b16"
6
+ },
7
+ "text_cfg": {
8
+ "context_length": 77,
9
+ "vocab_size": 49408,
10
+ "dim": 512,
11
+ "ffn_multiplier_per_layer": 4.0,
12
+ "n_heads_per_layer": 8,
13
+ "n_transformer_layers": 12,
14
+ "norm_layer": "layer_norm_fp32",
15
+ "causal_masking": true,
16
+ "model_name": "base"
17
+ }
18
+ }
fig_accuracy_latency.png ADDED
handler.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib, io, base64, torch, json
2
+ from PIL import Image
3
+ import open_clip
4
+ from reparam import reparameterize_model
5
+
6
+ class EndpointHandler:
7
+ def __init__(self, path: str = ""):
8
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # 1. Load the model (happens only once at startup)
11
+ model, _, self.preprocess = open_clip.create_model_and_transforms(
12
+ "MobileCLIP-B", pretrained='datacompdr'
13
+ )
14
+ model.eval()
15
+ self.model = reparameterize_model(model)
16
+ tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
17
+ self.model.to(self.device)
18
+
19
+ if self.device == "cuda":
20
+ self.model.to(torch.float16)
21
+
22
+ # --- OPTIMIZATION: Pre-compute text features from your JSON ---
23
+
24
+ # 2. Load your rich class definitions from the file
25
+ with open(f"{path}/items.json", "r", encoding="utf-8") as f:
26
+ class_definitions = json.load(f)
27
+
28
+ # 3. Prepare the data for encoding and for the final response
29
+ # - Use the 'prompt' field for creating the embeddings
30
+ # - Keep 'name' and 'id' to structure the response later
31
+ prompts = [item['prompt'] for item in class_definitions]
32
+ self.class_ids = [item['id'] for item in class_definitions]
33
+ self.class_names = [item['name'] for item in class_definitions]
34
+
35
+ # 4. Tokenize and encode all prompts at once
36
+ with torch.no_grad():
37
+ text_tokens = tokenizer(prompts).to(self.device)
38
+ self.text_features = self.model.encode_text(text_tokens)
39
+ self.text_features /= self.text_features.norm(dim=-1, keepdim=True)
40
+
41
+ def __call__(self, data):
42
+ # The payload only needs the image now
43
+ payload = data.get("inputs", data)
44
+ img_b64 = payload["image"]
45
+
46
+ # ---------------- decode image ----------------
47
+ image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
48
+ img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
49
+
50
+ if self.device == "cuda":
51
+ img_tensor = img_tensor.to(torch.float16)
52
+
53
+ # ---------------- forward pass (very fast) -----------------
54
+ with torch.no_grad():
55
+ # 1. Encode only the image
56
+ img_feat = self.model.encode_image(img_tensor)
57
+ img_feat /= img_feat.norm(dim=-1, keepdim=True)
58
+
59
+ # 2. Compute similarity against the pre-computed text features
60
+ probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0]
61
+
62
+ # 3. Combine the results with your stored class IDs and names
63
+ # and convert the tensor of probabilities to a list of floats
64
+ results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
65
+
66
+ # 4. Create a sorted list of dictionaries for a clean JSON response
67
+ return sorted(
68
+ [{"id": i, "label": name, "score": float(p)} for i, name, p in results],
69
+ key=lambda x: x["score"],
70
+ reverse=True
71
+ )
72
+
73
+
74
+
75
+ # """
76
+ # MobileCLIP‑B Zero‑Shot Image Classifier (Hugging Face Inference Endpoint)
77
+ # ===========================================================================
78
+
79
+ # * One container instance is created per replica; the `EndpointHandler`
80
+ # object below is instantiated exactly **once** at start‑up.
81
+
82
+ # * At request time (`__call__`) we receive a base‑64‑encoded image, run a
83
+ # **single forward pass**, and return class probabilities.
84
+
85
+ # Design choices
86
+ # --------------
87
+
88
+ # 1. **Model & transform come from OpenCLIP**
89
+ # This guarantees we apply **identical preprocessing** to what the model
90
+ # was trained with (224 × 224 crop + mean/std normalisation).
91
+
92
+ # 2. **Re‑parameterisation for inference**
93
+ # MobileCLIP uses MobileOne blocks that have extra convolution branches
94
+ # for training; `reparameterize_model` fuses them so inference is fast
95
+ # and deterministic.
96
+
97
+ # 3. **Text embeddings are cached**
98
+ # The class “prompts” (e.g. `"a photo of a cat"`) are encoded **once at
99
+ # start‑up**. Each request therefore encodes *only* the image and
100
+ # performs a single matrix multiplication.
101
+
102
+ # 4. **Mixed precision on GPU**
103
+ # If the container has CUDA, we cast the model **and** inputs to
104
+ # `float16`. That halves memory and roughly doubles throughput on most
105
+ # modern GPUs. On CPU we stay in `float32` for numerical stability.
106
+ # """
107
+
108
+ # import contextlib, io, base64, json
109
+ # from pathlib import Path
110
+ # from typing import Any, Dict, List
111
+
112
+ # import torch
113
+ # from PIL import Image
114
+ # import open_clip
115
+
116
+ # from reparam import reparameterize_model # local copy (~60 LoC) of Apple’s helper
117
+
118
+
119
+ # class EndpointHandler:
120
+ # """
121
+ # Hugging Face entry‑point. The toolkit will instantiate this class
122
+ # once and call it for every HTTP request.
123
+
124
+ # Parameters
125
+ # ----------
126
+ # path : str, optional
127
+ # Root directory of the repository. HF mounts the code under
128
+ # `/repository`; we use this path to locate `items.json`.
129
+ # """
130
+
131
+ # # ------------------------------------------------------------------ #
132
+ # # INITIALISATION (runs **once**) #
133
+ # # ------------------------------------------------------------------ #
134
+ # def __init__(self, path: str = "") -> None:
135
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
136
+
137
+ # # 1️⃣ Load MobileCLIP‑B weights & transforms -------------------
138
+ # # `pretrained="datacompdr"` makes OpenCLIP download the
139
+ # # official checkpoint from the Hub (cached in the image layer).
140
+ # model, _, self.preprocess = open_clip.create_model_and_transforms(
141
+ # "MobileCLIP-B", pretrained="datacompdr"
142
+ # )
143
+ # model.eval() # disable dropout / BN updates
144
+ # model = reparameterize_model(model) # fuse MobileOne branches
145
+ # model.to(self.device)
146
+ # if self.device == "cuda":
147
+ # model = model.to(torch.float16) # FP16 for throughput
148
+ # self.model = model # hold a reference
149
+
150
+ # # 2️⃣ Build the tokenizer once --------------------------------
151
+ # tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
152
+
153
+ # # 3️⃣ Load class metadata -------------------------------------
154
+ # # Expect JSON file: [{"id": 3, "name": "cat", "prompt": "cat"}, …]
155
+ # items_path = Path(path) / "items.json"
156
+ # with items_path.open("r", encoding="utf-8") as f:
157
+ # class_defs: List[Dict[str, Any]] = json.load(f)
158
+
159
+ # # Extract the bits we need later
160
+ # prompts = [item["prompt"] for item in class_defs]
161
+ # self.class_ids: List[int] = [item["id"] for item in class_defs]
162
+ # self.class_names: List[str] = [item["name"] for item in class_defs]
163
+
164
+ # # 4️⃣ Encode all prompts once ---------------------------------
165
+ # with torch.no_grad():
166
+ # text_tokens = tokenizer(prompts).to(self.device)
167
+ # text_feats = self.model.encode_text(text_tokens)
168
+ # text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
169
+ # self.text_features = text_feats # [num_classes, 512]
170
+
171
+ # # ------------------------------------------------------------------ #
172
+ # # INFERENCE CALL #
173
+ # # ------------------------------------------------------------------ #
174
+ # def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
175
+ # """
176
+ # Parameters
177
+ # ----------
178
+ # data : dict
179
+ # Either the raw payload `{"image": "<base64>"}` **or** the
180
+ # Hugging Face convention `{"inputs": {...}}`.
181
+
182
+ # Returns
183
+ # -------
184
+ # list of dict
185
+ # Sorted list of `{"id": int, "label": str, "score": float}`.
186
+ # Scores are the softmax probabilities over the *provided*
187
+ # class list (they sum to 1.0).
188
+ # """
189
+ # # 1️⃣ Unpack the request payload ------------------------------
190
+ # payload: Dict[str, Any] = data.get("inputs", data)
191
+ # img_b64: str = payload["image"]
192
+
193
+ # # 2️⃣ Decode + preprocess -------------------------------------
194
+ # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
195
+ # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) # [1, 3, 224, 224]
196
+ # if self.device == "cuda":
197
+ # img_tensor = img_tensor.to(torch.float16)
198
+
199
+ # # 3️⃣ Forward pass (image only) -------------------------------
200
+ # with torch.no_grad(): # no autograd graph
201
+ # img_feat = self.model.encode_image(img_tensor) # [1, 512]
202
+ # img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) # L2‑normalise
203
+
204
+ # # cosine similarity → logits → softmax probabilities
205
+ # probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0] # [num_classes]
206
+
207
+ # # 4️⃣ Assemble JSON‑serialisable response ---------------------
208
+ # results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
209
+ # return sorted(
210
+ # [{"id": cid, "label": name, "score": float(p)} for cid, name, p in results],
211
+ # key=lambda x: x["score"],
212
+ # reverse=True,
213
+ # )
214
+
215
+
216
+
items.json ADDED
@@ -0,0 +1,902 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "name": "Laptop",
4
+ "short_description": "Laptops are portable computers that can help you play games and learn new things. They can fold up just like a book!",
5
+ "category": "Office",
6
+ "rarity": "common",
7
+ "fun_fact": "Laptops have tiny fans to keep them cool while working.",
8
+ "id": 0,
9
+ "prompt": "a photo of a laptop computer (also called a notebook computer)",
10
+ "slug": "laptop"
11
+ },
12
+ {
13
+ "name": "Pen",
14
+ "short_description": "Pens are used to write and draw. They come in many colors.",
15
+ "category": "Office",
16
+ "rarity": "common",
17
+ "fun_fact": "Pens can write underwater using special waterproof ink.",
18
+ "id": 1,
19
+ "prompt": "a photo of an ink pen for writing",
20
+ "slug": "pen"
21
+ },
22
+ {
23
+ "name": "Notebook",
24
+ "short_description": "A notebook is a book with paper inside. You can write or draw in it!",
25
+ "category": "Office",
26
+ "rarity": "common",
27
+ "fun_fact": "Leonardo da Vinci used notebooks to sketch out their brilliant ideas, like flying machines!",
28
+ "id": 2,
29
+ "prompt": "a photo of a paper notebook for writing (stationery)",
30
+ "slug": "notebook"
31
+ },
32
+ {
33
+ "name": "Stapler",
34
+ "short_description": "A stapler helps you keep papers together by using metal clips.",
35
+ "category": "Office",
36
+ "rarity": "common",
37
+ "fun_fact": "The first stapler was made for King Louis XV in France.",
38
+ "id": 3,
39
+ "prompt": "a photo of a stapler (office paper stapler)",
40
+ "slug": "stapler"
41
+ },
42
+ {
43
+ "name": "Highlighter",
44
+ "short_description": "A highlighter is a bright pen used to mark important words or pictures.",
45
+ "category": "Office",
46
+ "rarity": "rare",
47
+ "fun_fact": "Highlighter ink glows under black light, like magic!",
48
+ "id": 4,
49
+ "prompt": "a photo of a highlighter pen (also called a fluorescent marker)",
50
+ "slug": "highlighter"
51
+ },
52
+ {
53
+ "name": "Paper Clip",
54
+ "short_description": "Paper clips are tiny helpers that keep papers together. They're shiny and colorful!",
55
+ "category": "Office",
56
+ "rarity": "rare",
57
+ "fun_fact": "The first paper clip was made over 100 years ago.",
58
+ "id": 5,
59
+ "prompt": "a photo of a paper clip (wire office clip)",
60
+ "slug": "paper_clip"
61
+ },
62
+ {
63
+ "name": "Desk Chair",
64
+ "short_description": "A desk chair is a seat for working or studying. It can roll and spin!",
65
+ "category": "Office",
66
+ "rarity": "rare",
67
+ "fun_fact": "Early chairs were just wooden and had no cushions.",
68
+ "id": 6,
69
+ "prompt": "a photo of a desk chair (office chair)",
70
+ "slug": "desk_chair"
71
+ },
72
+ {
73
+ "name": "Printer",
74
+ "short_description": "A printer makes copies of what's on a computer. It uses paper and ink.",
75
+ "category": "Office",
76
+ "rarity": "super rare",
77
+ "fun_fact": "Printers make pictures from millions of tiny dots.",
78
+ "id": 7,
79
+ "prompt": "a photo of a document printer (inkjet or laser printer)",
80
+ "slug": "printer"
81
+ },
82
+ {
83
+ "name": "Coffee Mug",
84
+ "short_description": "This is a cup with a handle to drink from. It's great for hot drinks!",
85
+ "category": "Office",
86
+ "rarity": "super rare",
87
+ "fun_fact": "Some mugs can change color or show pictures when they get hot.",
88
+ "id": 8,
89
+ "prompt": "a photo of a coffee mug (cup with a handle)",
90
+ "slug": "coffee_mug"
91
+ },
92
+ {
93
+ "name": "Pencil",
94
+ "short_description": "A pencil lets you draw and write. It's long, thin, and has an eraser.",
95
+ "category": "School",
96
+ "rarity": "common",
97
+ "fun_fact": "A single pencil can draw a line about 35 miles long.",
98
+ "id": 9,
99
+ "prompt": "a photo of a wooden graphite pencil (for writing)",
100
+ "slug": "pencil"
101
+ },
102
+ {
103
+ "name": "Backpack",
104
+ "short_description": "A backpack is a bag worn on your back. It holds all your school supplies!",
105
+ "category": "School",
106
+ "rarity": "common",
107
+ "fun_fact": "Some backpacks have hidden pockets for secret treasures.",
108
+ "id": 10,
109
+ "prompt": "a photo of a backpack (school bag)",
110
+ "slug": "backpack"
111
+ },
112
+ {
113
+ "name": "Book",
114
+ "short_description": "A book has pages with words and pictures. You can learn or enjoy stories.",
115
+ "category": "School",
116
+ "rarity": "common",
117
+ "fun_fact": "Book pages are called leaves, just like trees!",
118
+ "id": 11,
119
+ "prompt": "a photo of a printed book (paper book)",
120
+ "slug": "book"
121
+ },
122
+ {
123
+ "name": "Crayon",
124
+ "short_description": "Crayons are colorful sticks used to draw and color. They're easy to hold!",
125
+ "category": "School",
126
+ "rarity": "common",
127
+ "fun_fact": "Crayons are wax sticks; your warm hands help them draw smoothly.",
128
+ "id": 12,
129
+ "prompt": "a photo of a wax crayon (drawing crayon)",
130
+ "slug": "crayon"
131
+ },
132
+ {
133
+ "name": "Glue Stick",
134
+ "short_description": "A glue stick is used to stick paper and crafts. Just roll and glue!",
135
+ "category": "School",
136
+ "rarity": "common",
137
+ "fun_fact": "Some glue sticks glow in the dark for nighttime crafting fun!",
138
+ "id": 13,
139
+ "prompt": "a photo of a glue stick (solid glue)",
140
+ "slug": "glue_stick"
141
+ },
142
+ {
143
+ "name": "Ruler",
144
+ "short_description": "A ruler is used to measure how long things are.",
145
+ "category": "School",
146
+ "rarity": "rare",
147
+ "fun_fact": "Most rulers are 12 inches long — exactly one foot, just like your shoe!",
148
+ "id": 14,
149
+ "prompt": "a photo of a measuring ruler (straightedge)",
150
+ "slug": "ruler"
151
+ },
152
+ {
153
+ "name": "Lunchbox",
154
+ "short_description": "A lunchbox holds your yummy food for school. It keeps everything safe!",
155
+ "category": "School",
156
+ "rarity": "rare",
157
+ "fun_fact": "Thick, squishy walls help keep your lunch cool and fresh.",
158
+ "id": 15,
159
+ "prompt": "a photo of a lunchbox (school lunch box)",
160
+ "slug": "lunchbox"
161
+ },
162
+ {
163
+ "name": "Eraser",
164
+ "short_description": "Erasers help remove pencil marks. They're like magic fixers for your writing!",
165
+ "category": "School",
166
+ "rarity": "rare",
167
+ "fun_fact": "Erasers were first made from bread crumbs in ancient Japan.",
168
+ "id": 16,
169
+ "prompt": "a photo of an eraser (rubber eraser)",
170
+ "slug": "eraser"
171
+ },
172
+ {
173
+ "name": "Classroom Globe",
174
+ "short_description": "A globe shows us where countries and oceans are. It spins!",
175
+ "category": "School",
176
+ "rarity": "super rare",
177
+ "fun_fact": "A globe shows what our Earth looks like from space!",
178
+ "id": 17,
179
+ "prompt": "a photo of a classroom globe (world globe)",
180
+ "slug": "classroom_globe"
181
+ },
182
+ {
183
+ "name": "Daisy",
184
+ "short_description": "A daisy is a cute flower with white petals and a yellow center.",
185
+ "category": "Garden",
186
+ "rarity": "common",
187
+ "fun_fact": "Daisy flowers close their petals at night and open again in the morning.",
188
+ "id": 18,
189
+ "prompt": "a photo of a daisy (flower)",
190
+ "slug": "daisy"
191
+ },
192
+ {
193
+ "name": "Rock",
194
+ "short_description": "Rocks are hard and come in different shapes and sizes. You can find them almost anywhere!",
195
+ "category": "Garden",
196
+ "rarity": "common",
197
+ "fun_fact": "Some rocks were once melted lava from volcanoes, then cooled and hardened.",
198
+ "id": 19,
199
+ "prompt": "a photo of a rock (stone)",
200
+ "slug": "rock"
201
+ },
202
+ {
203
+ "name": "Leaf",
204
+ "short_description": "Leaves grow on trees and plants. They can be green, red, or yellow!",
205
+ "category": "Garden",
206
+ "rarity": "common",
207
+ "fun_fact": "Leaves make food for plants using sunlight, water, and air.",
208
+ "id": 20,
209
+ "prompt": "a photo of a leaf (plant leaf)",
210
+ "slug": "leaf"
211
+ },
212
+ {
213
+ "name": "Stick",
214
+ "short_description": "Sticks are long and hard, great for building or playing pretend!",
215
+ "category": "Garden",
216
+ "rarity": "common",
217
+ "fun_fact": "Sticks make great homes for small bugs and spiders.",
218
+ "id": 21,
219
+ "prompt": "a photo of a stick (tree branch or twig)",
220
+ "slug": "stick"
221
+ },
222
+ {
223
+ "name": "Watering Can",
224
+ "short_description": "A watering can helps us water plants! Pour water through the spout.",
225
+ "category": "Garden",
226
+ "rarity": "common",
227
+ "fun_fact": "The sprinkler cap at the end of the spout is called a 'rose'!",
228
+ "id": 22,
229
+ "prompt": "a photo of a watering can (garden watering can)",
230
+ "slug": "watering_can"
231
+ },
232
+ {
233
+ "name": "Snail",
234
+ "short_description": "Snails are little animals with shells on their backs. They move very slowly.",
235
+ "category": "Garden",
236
+ "rarity": "rare",
237
+ "fun_fact": "Snails make slippery slime so they can slide without scratching.",
238
+ "id": 23,
239
+ "prompt": "a photo of a snail (animal with a spiral shell)",
240
+ "slug": "snail"
241
+ },
242
+ {
243
+ "name": "Ladybug",
244
+ "short_description": "Ladybugs are small, red insects with black spots. They love to eat pesky bugs.",
245
+ "category": "Garden",
246
+ "rarity": "rare",
247
+ "fun_fact": "Ladybugs can have up to seven spots — count them if you can!",
248
+ "id": 24,
249
+ "prompt": "a photo of a ladybug (also called a ladybird beetle)",
250
+ "slug": "ladybug"
251
+ },
252
+ {
253
+ "name": "Birdhouse",
254
+ "short_description": "A birdhouse is a little house for birds. They stay safe and cozy inside.",
255
+ "category": "Garden",
256
+ "rarity": "super rare",
257
+ "fun_fact": "Some birds like to decorate their houses with shiny objects!",
258
+ "id": 25,
259
+ "prompt": "a photo of a birdhouse (nesting box)",
260
+ "slug": "birdhouse"
261
+ },
262
+ {
263
+ "name": "Garden Gnome",
264
+ "short_description": "A garden gnome is a small statue that lives in gardens and brings fun.",
265
+ "category": "Garden",
266
+ "rarity": "super rare",
267
+ "fun_fact": "Garden gnomes were once believed to guard treasures buried underground.",
268
+ "id": 26,
269
+ "prompt": "a photo of a garden gnome (garden statue)",
270
+ "slug": "garden_gnome"
271
+ },
272
+ {
273
+ "name": "Swing",
274
+ "short_description": "A swing is a seat that hangs from ropes or chains. It's super fun to swing high!",
275
+ "category": "Playground",
276
+ "rarity": "common",
277
+ "fun_fact": "Pumping your legs makes you go higher — each pump gives the swing a push.",
278
+ "id": 27,
279
+ "prompt": "a photo of a swing (playground swing)",
280
+ "slug": "swing"
281
+ },
282
+ {
283
+ "name": "Slide",
284
+ "short_description": "Slides are fun surfaces to quickly slide down. They can be found in playgrounds.",
285
+ "category": "Playground",
286
+ "rarity": "common",
287
+ "fun_fact": "Slides can be found in shapes like spirals and waves.",
288
+ "id": 28,
289
+ "prompt": "a photo of a slide (playground slide)",
290
+ "slug": "slide"
291
+ },
292
+ {
293
+ "name": "Ball",
294
+ "short_description": "Balls are round toys you can throw, catch, or kick. They're lots of fun!",
295
+ "category": "Playground",
296
+ "rarity": "common",
297
+ "fun_fact": "Balls are spheres, so they roll easily in any direction.",
298
+ "id": 29,
299
+ "prompt": "a photo of a ball (toy ball)",
300
+ "slug": "ball"
301
+ },
302
+ {
303
+ "name": "Sandbox",
304
+ "short_description": "A sandbox is a fun place where kids can play with sand. You can build castles and dig holes!",
305
+ "category": "Playground",
306
+ "rarity": "common",
307
+ "fun_fact": "Sand can be different colors like yellow, white, and even pink!",
308
+ "id": 30,
309
+ "prompt": "a photo of a sandbox (box of play sand)",
310
+ "slug": "sandbox"
311
+ },
312
+ {
313
+ "name": "Seesaw",
314
+ "short_description": "A seesaw is a long board that goes up and down. Friends can play on it together!",
315
+ "category": "Playground",
316
+ "rarity": "common",
317
+ "fun_fact": "Playing on a seesaw helps improve your balance and teamwork.",
318
+ "id": 31,
319
+ "prompt": "a photo of a seesaw (teeter-totter)",
320
+ "slug": "seesaw"
321
+ },
322
+ {
323
+ "name": "Hula Hoop",
324
+ "short_description": "A hula hoop is a big round toy you can spin around your waist. It's super fun!",
325
+ "category": "Playground",
326
+ "rarity": "rare",
327
+ "fun_fact": "The world record is over 100 hours of hula hooping!",
328
+ "id": 32,
329
+ "prompt": "a photo of a hula hoop (toy hoop)",
330
+ "slug": "hula_hoop"
331
+ },
332
+ {
333
+ "name": "Frisbee",
334
+ "short_description": "A frisbee is a round, flying disc that you can throw and catch with friends.",
335
+ "category": "Playground",
336
+ "rarity": "rare",
337
+ "fun_fact": "There's a sport called Ultimate Frisbee, like soccer with Frisbees.",
338
+ "id": 33,
339
+ "prompt": "a photo of a frisbee (flying disc)",
340
+ "slug": "frisbee"
341
+ },
342
+ {
343
+ "name": "Climbing Frame",
344
+ "short_description": "A climbing frame is a structure to climb and play on. It's super fun!",
345
+ "category": "Playground",
346
+ "rarity": "super rare",
347
+ "fun_fact": "Climbing frames can be shaped like rockets or castles!",
348
+ "id": 34,
349
+ "prompt": "a photo of a climbing frame (jungle gym)",
350
+ "slug": "climbing_frame"
351
+ },
352
+ {
353
+ "name": "Bouncer",
354
+ "short_description": "A bouncer is a fun thing to jump on. It helps you bounce up and down!",
355
+ "category": "Playground",
356
+ "rarity": "super rare",
357
+ "fun_fact": "Springs or bouncy air push you back up after each jump—boing, boing!",
358
+ "id": 35,
359
+ "prompt": "a photo of a playground bouncer (spring rider toy)",
360
+ "slug": "bouncer"
361
+ },
362
+ {
363
+ "name": "Dog",
364
+ "short_description": "A dog is a fun, furry friend. They love to play and wag their tails!",
365
+ "category": "Animals",
366
+ "rarity": "common",
367
+ "fun_fact": "Dogs have a super sense of smell, much better than ours!",
368
+ "id": 36,
369
+ "prompt": "a photo of a dog (domestic dog)",
370
+ "slug": "dog"
371
+ },
372
+ {
373
+ "name": "Cat",
374
+ "short_description": "Cats are furry animals with whiskers and tails. They love to purr and nap.",
375
+ "category": "Animals",
376
+ "rarity": "common",
377
+ "fun_fact": "Whiskers help cats feel around in the dark or tight spaces.",
378
+ "id": 37,
379
+ "prompt": "a photo of a cat (house cat)",
380
+ "slug": "cat"
381
+ },
382
+ {
383
+ "name": "Bird",
384
+ "short_description": "Birds have wings and feathers. They love to chirp and fly!",
385
+ "category": "Animals",
386
+ "rarity": "common",
387
+ "fun_fact": "Penguins are birds but can't fly; they are great swimmers!",
388
+ "id": 38,
389
+ "prompt": "a photo of a bird (animal with feathers)",
390
+ "slug": "bird"
391
+ },
392
+ {
393
+ "name": "Fly",
394
+ "short_description": "Flies are tiny insects that love to buzz around. They have wings and big eyes!",
395
+ "category": "Animals",
396
+ "rarity": "common",
397
+ "fun_fact": "A fly's wings beat over 200 times every second!",
398
+ "id": 39,
399
+ "prompt": "a photo of a fly (housefly insect)",
400
+ "slug": "fly"
401
+ },
402
+ {
403
+ "name": "Fish",
404
+ "short_description": "Fish live in water and come in many colors and sizes. They have fins to swim.",
405
+ "category": "Animals",
406
+ "rarity": "common",
407
+ "fun_fact": "Fish can sleep with their eyes open—because they have no eyelids!",
408
+ "id": 40,
409
+ "prompt": "a photo of a fish (aquarium or pond fish)",
410
+ "slug": "fish"
411
+ },
412
+ {
413
+ "name": "Rabbit",
414
+ "short_description": "Rabbits are fluffy animals with long ears and love to hop around.",
415
+ "category": "Animals",
416
+ "rarity": "rare",
417
+ "fun_fact": "A rabbit's teeth never stop growing, so they chew a lot!",
418
+ "id": 41,
419
+ "prompt": "a photo of a rabbit (bunny)",
420
+ "slug": "rabbit"
421
+ },
422
+ {
423
+ "name": "Frog",
424
+ "short_description": "Frogs are jumpy animals that live in ponds. They make funny ribbit sounds!",
425
+ "category": "Animals",
426
+ "rarity": "rare",
427
+ "fun_fact": "Frogs can jump over 20 times their own body length!",
428
+ "id": 42,
429
+ "prompt": "a photo of a frog (amphibian)",
430
+ "slug": "frog"
431
+ },
432
+ {
433
+ "name": "Duck",
434
+ "short_description": "Ducks are birds that love to swim and quack. They have webbed feet!",
435
+ "category": "Animals",
436
+ "rarity": "super rare",
437
+ "fun_fact": "Ducks have waterproof feathers thanks to special oils they produce.",
438
+ "id": 43,
439
+ "prompt": "a photo of a duck (water bird)",
440
+ "slug": "duck"
441
+ },
442
+ {
443
+ "name": "Squirrel",
444
+ "short_description": "Squirrels are small, bushy-tailed animals that love to climb trees. They gather nuts!",
445
+ "category": "Animals",
446
+ "rarity": "super rare",
447
+ "fun_fact": "Squirrels plant thousands of trees by forgetting where they buried nuts.",
448
+ "id": 44,
449
+ "prompt": "a photo of a squirrel (tree squirrel)",
450
+ "slug": "squirrel"
451
+ },
452
+ {
453
+ "name": "Car",
454
+ "short_description": "A car has wheels and a steering wheel. It can take you places!",
455
+ "category": "Transportation",
456
+ "rarity": "common",
457
+ "fun_fact": "The first cars had no steering wheels—drivers used levers instead.",
458
+ "id": 45,
459
+ "prompt": "a photo of a car (automobile)",
460
+ "slug": "car"
461
+ },
462
+ {
463
+ "name": "Bus",
464
+ "short_description": "A bus is a big vehicle that takes people to places. It's like a car, but much bigger!",
465
+ "category": "Transportation",
466
+ "rarity": "common",
467
+ "fun_fact": "The first buses were pulled by horses before engines were invented!",
468
+ "id": 46,
469
+ "prompt": "a photo of a bus (passenger bus)",
470
+ "slug": "bus"
471
+ },
472
+ {
473
+ "name": "Bicycle",
474
+ "short_description": "Bicycles have pedals and handlebars you use to ride around. They're great for fun and exercise!",
475
+ "category": "Transportation",
476
+ "rarity": "common",
477
+ "fun_fact": "The first bicycles were called \"velocipedes\" and had wooden wheels!",
478
+ "id": 47,
479
+ "prompt": "a photo of a bicycle (bike)",
480
+ "slug": "bicycle"
481
+ },
482
+ {
483
+ "name": "Scooter",
484
+ "short_description": "A scooter is a ride-on toy with two wheels. You can push with your feet!",
485
+ "category": "Transportation",
486
+ "rarity": "common",
487
+ "fun_fact": "Early scooters were made of wood with roller skate wheels.",
488
+ "id": 48,
489
+ "prompt": "a photo of a scooter (kick scooter)",
490
+ "slug": "scooter"
491
+ },
492
+ {
493
+ "name": "Train",
494
+ "short_description": "Trains are big, fast vehicles that travel on tracks. They carry people and goods.",
495
+ "category": "Transportation",
496
+ "rarity": "common",
497
+ "fun_fact": "The longest train ever was over 4.5 miles long!",
498
+ "id": 49,
499
+ "prompt": "a photo of a train (railway train)",
500
+ "slug": "train"
501
+ },
502
+ {
503
+ "name": "Fire Truck",
504
+ "short_description": "A fire truck helps put out fires. It carries firefighters and water.",
505
+ "category": "Transportation",
506
+ "rarity": "rare",
507
+ "fun_fact": "Some fire trucks have ladders that reach over 100 feet high.",
508
+ "id": 50,
509
+ "prompt": "a photo of a fire truck (fire engine)",
510
+ "slug": "fire_truck"
511
+ },
512
+ {
513
+ "name": "Tractor",
514
+ "short_description": "Tractors are big machines that help farmers. They pull heavy things and plow fields.",
515
+ "category": "Transportation",
516
+ "rarity": "rare",
517
+ "fun_fact": "Some tractors have wheels taller than a person!",
518
+ "id": 51,
519
+ "prompt": "a photo of a tractor (farm tractor)",
520
+ "slug": "tractor"
521
+ },
522
+ {
523
+ "name": "Plane",
524
+ "short_description": "Planes are big flying machines. They take us on long trips in the sky!",
525
+ "category": "Transportation",
526
+ "rarity": "super rare",
527
+ "fun_fact": "The fastest passenger plane flew faster than the speed of sound.",
528
+ "id": 52,
529
+ "prompt": "a photo of an airplane (passenger plane)",
530
+ "slug": "plane"
531
+ },
532
+ {
533
+ "name": "Boat",
534
+ "short_description": "A boat is a fun way to travel on water. It can be big or small!",
535
+ "category": "Transportation",
536
+ "rarity": "super rare",
537
+ "fun_fact": "Viking boats were called longships and had dragon heads!",
538
+ "id": 53,
539
+ "prompt": "a photo of a boat (watercraft)",
540
+ "slug": "boat"
541
+ },
542
+ {
543
+ "name": "Toothbrush",
544
+ "short_description": "A toothbrush helps clean your teeth. Use it with toothpaste for a shiny smile!",
545
+ "category": "House",
546
+ "rarity": "common",
547
+ "fun_fact": "The first toothbrushes were made from sticks and twigs!",
548
+ "id": 54,
549
+ "prompt": "a photo of a toothbrush (manual toothbrush)",
550
+ "slug": "toothbrush"
551
+ },
552
+ {
553
+ "name": "Towel",
554
+ "short_description": "A towel is soft and helps you dry off after a bath or swim.",
555
+ "category": "House",
556
+ "rarity": "common",
557
+ "fun_fact": "Towels soak up water with thousands of tiny fabric loops.",
558
+ "id": 55,
559
+ "prompt": "a photo of a towel (bath towel)",
560
+ "slug": "towel"
561
+ },
562
+ {
563
+ "name": "Lamp",
564
+ "short_description": "A lamp lights up a room. You turn it on with a switch!",
565
+ "category": "House",
566
+ "rarity": "common",
567
+ "fun_fact": "The first electric lamp was made over 140 years ago by Thomas Edison.",
568
+ "id": 56,
569
+ "prompt": "a photo of a lamp (table or floor lamp)",
570
+ "slug": "lamp"
571
+ },
572
+ {
573
+ "name": "Chair",
574
+ "short_description": "A chair is a seat with four legs and a back. It's great for sitting!",
575
+ "category": "House",
576
+ "rarity": "common",
577
+ "fun_fact": "In ancient Egypt, only kings and queens had chairs with backs.",
578
+ "id": 57,
579
+ "prompt": "a photo of a chair (wooden or cushioned chair)",
580
+ "slug": "chair"
581
+ },
582
+ {
583
+ "name": "Cushion",
584
+ "short_description": "A cushion is a soft pad you can sit on. It makes couches more comfy!",
585
+ "category": "House",
586
+ "rarity": "common",
587
+ "fun_fact": "In ancient times, cushions were signs of royalty and power.",
588
+ "id": 58,
589
+ "prompt": "a photo of a cushion (sofa cushion)",
590
+ "slug": "cushion"
591
+ },
592
+ {
593
+ "name": "Fan",
594
+ "short_description": "A fan makes the air move and keeps us cool. It's like a mini wind!",
595
+ "category": "House",
596
+ "rarity": "rare",
597
+ "fun_fact": "Electric fans were invented over 130 years ago!",
598
+ "id": 59,
599
+ "prompt": "a photo of a fan (electric fan)",
600
+ "slug": "fan"
601
+ },
602
+ {
603
+ "name": "Clock",
604
+ "short_description": "A clock tells the time. It has numbers and hands to point.",
605
+ "category": "House",
606
+ "rarity": "rare",
607
+ "fun_fact": "Big Ben is a famous clock tower in London, England.",
608
+ "id": 60,
609
+ "prompt": "a photo of a clock (analog or wall clock)",
610
+ "slug": "clock"
611
+ },
612
+ {
613
+ "name": "Painting",
614
+ "short_description": "Paintings are cool pictures made with colors on paper or canvas.",
615
+ "category": "House",
616
+ "rarity": "super rare",
617
+ "fun_fact": "Famous artist Leonardo da Vinci loved painting with eggs.",
618
+ "id": 61,
619
+ "prompt": "a photo of a painting (art on canvas or paper)",
620
+ "slug": "painting"
621
+ },
622
+ {
623
+ "name": "Plant Pot",
624
+ "short_description": "A plant pot holds soil for growing plants. It's like a tiny garden!",
625
+ "category": "House",
626
+ "rarity": "super rare",
627
+ "fun_fact": "Using big pots lets plants grow big and tall!",
628
+ "id": 62,
629
+ "prompt": "a photo of a plant pot (flower pot)",
630
+ "slug": "plant_pot"
631
+ },
632
+ {
633
+ "name": "T-Shirt",
634
+ "short_description": "A T-shirt is a piece of clothing with short sleeves. It's soft and comfy!",
635
+ "category": "Clothing",
636
+ "rarity": "common",
637
+ "fun_fact": "T-shirts got their name because of their T-shape.",
638
+ "id": 63,
639
+ "prompt": "a photo of a t-shirt (short-sleeved shirt)",
640
+ "slug": "t_shirt"
641
+ },
642
+ {
643
+ "name": "Socks",
644
+ "short_description": "Socks are soft cloth tubes for your feet. They come in lots of colors!",
645
+ "category": "Clothing",
646
+ "rarity": "common",
647
+ "fun_fact": "There are special socks for running with extra cushion.",
648
+ "id": 64,
649
+ "prompt": "a photo of socks (pair of socks)",
650
+ "slug": "socks"
651
+ },
652
+ {
653
+ "name": "Shoes",
654
+ "short_description": "Shoes protect your feet and look cool! They come in all colors.",
655
+ "category": "Clothing",
656
+ "rarity": "common",
657
+ "fun_fact": "Shoes were first invented over 5,000 years ago!",
658
+ "id": 65,
659
+ "prompt": "a photo of shoes (pair of shoes)",
660
+ "slug": "shoes"
661
+ },
662
+ {
663
+ "name": "Hat",
664
+ "short_description": "A hat is something you wear on your head. It can keep you warm or cool!",
665
+ "category": "Clothing",
666
+ "rarity": "common",
667
+ "fun_fact": "Some hats are made just for magic tricks!",
668
+ "id": 66,
669
+ "prompt": "a photo of a hat (headwear)",
670
+ "slug": "hat"
671
+ },
672
+ {
673
+ "name": "Jacket",
674
+ "short_description": "A jacket keeps you warm when it's chilly. It's like a cozy hug.",
675
+ "category": "Clothing",
676
+ "rarity": "common",
677
+ "fun_fact": "Winter jackets are often stuffed with fluffy feathers!",
678
+ "id": 67,
679
+ "prompt": "a photo of a jacket (outerwear)",
680
+ "slug": "jacket"
681
+ },
682
+ {
683
+ "name": "Scarf",
684
+ "short_description": "A scarf is a soft piece of clothing you wear around your neck. It keeps you warm.",
685
+ "category": "Clothing",
686
+ "rarity": "rare",
687
+ "fun_fact": "In ancient Rome, scarves were used to wipe sweat, not for warmth.",
688
+ "id": 68,
689
+ "prompt": "a photo of a scarf (neckwear)",
690
+ "slug": "scarf"
691
+ },
692
+ {
693
+ "name": "Gloves",
694
+ "short_description": "Gloves are like tiny jackets for your hands. They keep you warm or safe!",
695
+ "category": "Clothing",
696
+ "rarity": "rare",
697
+ "fun_fact": "Astronauts wear special gloves to hold tools in space!",
698
+ "id": 69,
699
+ "prompt": "a photo of gloves (pair of gloves)",
700
+ "slug": "gloves"
701
+ },
702
+ {
703
+ "name": "Sunglasses",
704
+ "short_description": "Sunglasses protect your eyes from the bright sun. They come in cool shapes!",
705
+ "category": "Clothing",
706
+ "rarity": "rare",
707
+ "fun_fact": "In ancient China, judges wore dark glasses to hide their eyes in court.",
708
+ "id": 70,
709
+ "prompt": "a photo of sunglasses (pair of sunglasses)",
710
+ "slug": "sunglasses"
711
+ },
712
+ {
713
+ "name": "Umbrella",
714
+ "short_description": "Umbrellas keep you dry in the rain. They open wide like a big flower!",
715
+ "category": "Clothing",
716
+ "rarity": "super rare",
717
+ "fun_fact": "Umbrellas were once used as sun protection, not for rain.",
718
+ "id": 71,
719
+ "prompt": "a photo of an umbrella (rain umbrella)",
720
+ "slug": "umbrella"
721
+ },
722
+ {
723
+ "name": "Sofa",
724
+ "short_description": "A sofa is a big, comfy seat. You can sit or lie on it.",
725
+ "category": "Living Room",
726
+ "rarity": "common",
727
+ "fun_fact": "The word 'sofa' comes from an Arabic word meaning 'bench with cushions'.",
728
+ "id": 72,
729
+ "prompt": "a photo of a sofa (couch)",
730
+ "slug": "sofa"
731
+ },
732
+ {
733
+ "name": "TV",
734
+ "short_description": "A TV shows your favorite cartoons and movies with sound and pictures.",
735
+ "category": "Living Room",
736
+ "rarity": "common",
737
+ "fun_fact": "The first TV shows were black and white.",
738
+ "id": 73,
739
+ "prompt": "a photo of a television (TV set)",
740
+ "slug": "tv"
741
+ },
742
+ {
743
+ "name": "Remote Control",
744
+ "short_description": "A remote helps you change channels on the TV. It has buttons to press!",
745
+ "category": "Living Room",
746
+ "rarity": "rare",
747
+ "fun_fact": "Remote controls use light beams called infrared to work.",
748
+ "id": 74,
749
+ "prompt": "a photo of a remote control (TV remote)",
750
+ "slug": "remote_control"
751
+ },
752
+ {
753
+ "name": "Rug",
754
+ "short_description": "Rugs are soft mats that cover the floor. They come in many colors and shapes.",
755
+ "category": "Living Room",
756
+ "rarity": "common",
757
+ "fun_fact": "Some handmade rugs can take years to finish.",
758
+ "id": 75,
759
+ "prompt": "a photo of a rug (floor carpet)",
760
+ "slug": "rug"
761
+ },
762
+ {
763
+ "name": "Bookshelf",
764
+ "short_description": "A bookshelf holds your books, keeping them tidy and easy to find.",
765
+ "category": "Living Room",
766
+ "rarity": "rare",
767
+ "fun_fact": "Some bookshelves have secret compartments for hiding small treasures.",
768
+ "id": 76,
769
+ "prompt": "a photo of a bookshelf (bookcase)",
770
+ "slug": "bookshelf"
771
+ },
772
+ {
773
+ "name": "Blanket",
774
+ "short_description": "A blanket is soft and keeps you warm. It's great for snuggling!",
775
+ "category": "Living Room",
776
+ "rarity": "rare",
777
+ "fun_fact": "Blankets keep you warm by trapping your body heat inside.",
778
+ "id": 77,
779
+ "prompt": "a photo of a blanket (throw blanket)",
780
+ "slug": "blanket"
781
+ },
782
+ {
783
+ "name": "Candle",
784
+ "short_description": "Candles are sticks of wax with a wick that can be lit to make light. They also smell nice!",
785
+ "category": "Living Room",
786
+ "rarity": "super rare",
787
+ "fun_fact": "The earliest candles were made from animal fat, not wax.",
788
+ "id": 78,
789
+ "prompt": "a photo of a candle (wax candle)",
790
+ "slug": "candle"
791
+ },
792
+ {
793
+ "name": "Record Player",
794
+ "short_description": "A record player plays music from vinyl records. It uses a needle to read grooves.",
795
+ "category": "Living Room",
796
+ "rarity": "super rare",
797
+ "fun_fact": "A needle travels in grooves to create the music we hear.",
798
+ "id": 79,
799
+ "prompt": "a photo of a record player (turntable)",
800
+ "slug": "record_player"
801
+ },
802
+ {
803
+ "name": "Coffee Table",
804
+ "short_description": "A coffee table sits in front of the sofa and holds drinks, snacks, or board games.",
805
+ "category": "Living Room",
806
+ "rarity": "common",
807
+ "fun_fact": "Coffee tables can be square, round, or even shaped like animals!",
808
+ "id": 80,
809
+ "prompt": "a photo of a coffee table (low table)",
810
+ "slug": "coffee_table"
811
+ },
812
+ {
813
+ "name": "Teddy Bear",
814
+ "short_description": "Teddy bears are soft, cuddly toys. They love hugs and snuggles!",
815
+ "category": "Toys & Crafts",
816
+ "rarity": "common",
817
+ "fun_fact": "Teddy bears are named after U.S. President Theodore Roosevelt.",
818
+ "id": 81,
819
+ "prompt": "a photo of a teddy bear (stuffed toy)",
820
+ "slug": "teddy_bear"
821
+ },
822
+ {
823
+ "name": "Toy Car",
824
+ "short_description": "A toy car is fun to zoom around. You can pretend it's on a racetrack!",
825
+ "category": "Toys & Crafts",
826
+ "rarity": "common",
827
+ "fun_fact": "Some tiny toy cars can roll over 20 feet on one push!",
828
+ "id": 82,
829
+ "prompt": "a photo of a toy car (miniature car)",
830
+ "slug": "toy_car"
831
+ },
832
+ {
833
+ "name": "LEGO",
834
+ "short_description": "LEGO bricks are colorful blocks that you can use to build anything you imagine!",
835
+ "category": "Toys & Crafts",
836
+ "rarity": "common",
837
+ "fun_fact": "The word LEGO means 'play well' in Danish.",
838
+ "id": 83,
839
+ "prompt": "a photo of lego bricks (building blocks)",
840
+ "slug": "lego"
841
+ },
842
+ {
843
+ "name": "Paintbrush",
844
+ "short_description": "A paintbrush is used to make art by spreading paint on surfaces. It has soft bristles!",
845
+ "category": "Toys & Crafts",
846
+ "rarity": "common",
847
+ "fun_fact": "The oldest known paintbrushes were made from animal hair tied to sticks.",
848
+ "id": 84,
849
+ "prompt": "a photo of a paintbrush (art brush)",
850
+ "slug": "paintbrush"
851
+ },
852
+ {
853
+ "name": "Doll",
854
+ "short_description": "A doll is a fun toy that looks like a person. You can dress it up and play with it!",
855
+ "category": "Toys & Crafts",
856
+ "rarity": "rare",
857
+ "fun_fact": "Ancient dolls were made from clay, wood, and even ivory.",
858
+ "id": 85,
859
+ "prompt": "a photo of a doll (toy doll)",
860
+ "slug": "doll"
861
+ },
862
+ {
863
+ "name": "Videogame Console",
864
+ "short_description": "A console lets you play video games on your TV. How fun!",
865
+ "category": "Toys & Crafts",
866
+ "rarity": "common",
867
+ "fun_fact": "The first home console was called the Magnavox Odyssey.",
868
+ "id": 86,
869
+ "prompt": "a photo of a video game console (gaming console)",
870
+ "slug": "videogame_console"
871
+ },
872
+ {
873
+ "name": "Puzzle Piece",
874
+ "short_description": "A puzzle piece fits with others to make a big picture. It's like a fun mystery!",
875
+ "category": "Toys & Crafts",
876
+ "rarity": "rare",
877
+ "fun_fact": "The first jigsaw puzzles were made from maps to teach kids geography.",
878
+ "id": 87,
879
+ "prompt": "a photo of a puzzle piece (jigsaw puzzle piece)",
880
+ "slug": "puzzle_piece"
881
+ },
882
+ {
883
+ "name": "Stickers",
884
+ "short_description": "Stickers are like colorful, sticky friends you can put on things! They make everything fun.",
885
+ "category": "Toys & Crafts",
886
+ "rarity": "rare",
887
+ "fun_fact": "Some artists use thousands of tiny stickers to create huge, amazing pictures.",
888
+ "id": 88,
889
+ "prompt": "a photo of stickers (adhesive stickers)",
890
+ "slug": "stickers"
891
+ },
892
+ {
893
+ "name": "Magic Wand",
894
+ "short_description": "A magic wand is a stick that wizards use to do magic! It sparkles and shines.",
895
+ "category": "Toys & Crafts",
896
+ "rarity": "super rare",
897
+ "fun_fact": "Wizards wave their wands in special ways for magic to work!",
898
+ "id": 89,
899
+ "prompt": "a photo of a magic wand (toy wand)",
900
+ "slug": "magic_wand"
901
+ }
902
+ ]
reparam.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # For licensing see accompanying LICENSE file.
3
+ # Copyright (C) 2024 Apple Inc. All Rights Reserved.
4
+ #
5
+ from typing import Union, Tuple
6
+
7
+ import copy
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+
12
+ __all__ = ["MobileOneBlock", "reparameterize_model"]
13
+
14
+
15
+ class SEBlock(nn.Module):
16
+ """Squeeze and Excite module.
17
+
18
+ Pytorch implementation of `Squeeze-and-Excitation Networks` -
19
+ https://arxiv.org/pdf/1709.01507.pdf
20
+ """
21
+
22
+ def __init__(self, in_channels: int, rd_ratio: float = 0.0625) -> None:
23
+ """Construct a Squeeze and Excite Module.
24
+
25
+ Args:
26
+ in_channels: Number of input channels.
27
+ rd_ratio: Input channel reduction ratio.
28
+ """
29
+ super(SEBlock, self).__init__()
30
+ self.reduce = nn.Conv2d(
31
+ in_channels=in_channels,
32
+ out_channels=int(in_channels * rd_ratio),
33
+ kernel_size=1,
34
+ stride=1,
35
+ bias=True,
36
+ )
37
+ self.expand = nn.Conv2d(
38
+ in_channels=int(in_channels * rd_ratio),
39
+ out_channels=in_channels,
40
+ kernel_size=1,
41
+ stride=1,
42
+ bias=True,
43
+ )
44
+
45
+ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
46
+ """Apply forward pass."""
47
+ b, c, h, w = inputs.size()
48
+ x = F.avg_pool2d(inputs, kernel_size=[h, w])
49
+ x = self.reduce(x)
50
+ x = F.relu(x)
51
+ x = self.expand(x)
52
+ x = torch.sigmoid(x)
53
+ x = x.view(-1, c, 1, 1)
54
+ return inputs * x
55
+
56
+
57
+ class MobileOneBlock(nn.Module):
58
+ """MobileOne building block.
59
+
60
+ This block has a multi-branched architecture at train-time
61
+ and plain-CNN style architecture at inference time
62
+ For more details, please refer to our paper:
63
+ `An Improved One millisecond Mobile Backbone` -
64
+ https://arxiv.org/pdf/2206.04040.pdf
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ in_channels: int,
70
+ out_channels: int,
71
+ kernel_size: int,
72
+ stride: int = 1,
73
+ padding: int = 0,
74
+ dilation: int = 1,
75
+ groups: int = 1,
76
+ inference_mode: bool = False,
77
+ use_se: bool = False,
78
+ use_act: bool = True,
79
+ use_scale_branch: bool = True,
80
+ num_conv_branches: int = 1,
81
+ activation: nn.Module = nn.GELU(),
82
+ ) -> None:
83
+ """Construct a MobileOneBlock module.
84
+
85
+ Args:
86
+ in_channels: Number of channels in the input.
87
+ out_channels: Number of channels produced by the block.
88
+ kernel_size: Size of the convolution kernel.
89
+ stride: Stride size.
90
+ padding: Zero-padding size.
91
+ dilation: Kernel dilation factor.
92
+ groups: Group number.
93
+ inference_mode: If True, instantiates model in inference mode.
94
+ use_se: Whether to use SE-ReLU activations.
95
+ use_act: Whether to use activation. Default: ``True``
96
+ use_scale_branch: Whether to use scale branch. Default: ``True``
97
+ num_conv_branches: Number of linear conv branches.
98
+ """
99
+ super(MobileOneBlock, self).__init__()
100
+ self.inference_mode = inference_mode
101
+ self.groups = groups
102
+ self.stride = stride
103
+ self.padding = padding
104
+ self.dilation = dilation
105
+ self.kernel_size = kernel_size
106
+ self.in_channels = in_channels
107
+ self.out_channels = out_channels
108
+ self.num_conv_branches = num_conv_branches
109
+
110
+ # Check if SE-ReLU is requested
111
+ if use_se:
112
+ self.se = SEBlock(out_channels)
113
+ else:
114
+ self.se = nn.Identity()
115
+
116
+ if use_act:
117
+ self.activation = activation
118
+ else:
119
+ self.activation = nn.Identity()
120
+
121
+ if inference_mode:
122
+ self.reparam_conv = nn.Conv2d(
123
+ in_channels=in_channels,
124
+ out_channels=out_channels,
125
+ kernel_size=kernel_size,
126
+ stride=stride,
127
+ padding=padding,
128
+ dilation=dilation,
129
+ groups=groups,
130
+ bias=True,
131
+ )
132
+ else:
133
+ # Re-parameterizable skip connection
134
+ self.rbr_skip = (
135
+ nn.BatchNorm2d(num_features=in_channels)
136
+ if out_channels == in_channels and stride == 1
137
+ else None
138
+ )
139
+
140
+ # Re-parameterizable conv branches
141
+ if num_conv_branches > 0:
142
+ rbr_conv = list()
143
+ for _ in range(self.num_conv_branches):
144
+ rbr_conv.append(
145
+ self._conv_bn(kernel_size=kernel_size, padding=padding)
146
+ )
147
+ self.rbr_conv = nn.ModuleList(rbr_conv)
148
+ else:
149
+ self.rbr_conv = None
150
+
151
+ # Re-parameterizable scale branch
152
+ self.rbr_scale = None
153
+ if not isinstance(kernel_size, int):
154
+ kernel_size = kernel_size[0]
155
+ if (kernel_size > 1) and use_scale_branch:
156
+ self.rbr_scale = self._conv_bn(kernel_size=1, padding=0)
157
+
158
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
159
+ """Apply forward pass."""
160
+ # Inference mode forward pass.
161
+ if self.inference_mode:
162
+ return self.activation(self.se(self.reparam_conv(x)))
163
+
164
+ # Multi-branched train-time forward pass.
165
+ # Skip branch output
166
+ identity_out = 0
167
+ if self.rbr_skip is not None:
168
+ identity_out = self.rbr_skip(x)
169
+
170
+ # Scale branch output
171
+ scale_out = 0
172
+ if self.rbr_scale is not None:
173
+ scale_out = self.rbr_scale(x)
174
+
175
+ # Other branches
176
+ out = scale_out + identity_out
177
+ if self.rbr_conv is not None:
178
+ for ix in range(self.num_conv_branches):
179
+ out += self.rbr_conv[ix](x)
180
+
181
+ return self.activation(self.se(out))
182
+
183
+ def reparameterize(self):
184
+ """Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
185
+ https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
186
+ architecture used at training time to obtain a plain CNN-like structure
187
+ for inference.
188
+ """
189
+ if self.inference_mode:
190
+ return
191
+ kernel, bias = self._get_kernel_bias()
192
+ self.reparam_conv = nn.Conv2d(
193
+ in_channels=self.in_channels,
194
+ out_channels=self.out_channels,
195
+ kernel_size=self.kernel_size,
196
+ stride=self.stride,
197
+ padding=self.padding,
198
+ dilation=self.dilation,
199
+ groups=self.groups,
200
+ bias=True,
201
+ )
202
+ self.reparam_conv.weight.data = kernel
203
+ self.reparam_conv.bias.data = bias
204
+
205
+ # Delete un-used branches
206
+ for para in self.parameters():
207
+ para.detach_()
208
+ self.__delattr__("rbr_conv")
209
+ self.__delattr__("rbr_scale")
210
+ if hasattr(self, "rbr_skip"):
211
+ self.__delattr__("rbr_skip")
212
+
213
+ self.inference_mode = True
214
+
215
+ def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
216
+ """Method to obtain re-parameterized kernel and bias.
217
+ Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
218
+
219
+ Returns:
220
+ Tuple of (kernel, bias) after fusing branches.
221
+ """
222
+ # get weights and bias of scale branch
223
+ kernel_scale = 0
224
+ bias_scale = 0
225
+ if self.rbr_scale is not None:
226
+ kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
227
+ # Pad scale branch kernel to match conv branch kernel size.
228
+ pad = self.kernel_size // 2
229
+ kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
230
+
231
+ # get weights and bias of skip branch
232
+ kernel_identity = 0
233
+ bias_identity = 0
234
+ if self.rbr_skip is not None:
235
+ kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip)
236
+
237
+ # get weights and bias of conv branches
238
+ kernel_conv = 0
239
+ bias_conv = 0
240
+ if self.rbr_conv is not None:
241
+ for ix in range(self.num_conv_branches):
242
+ _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
243
+ kernel_conv += _kernel
244
+ bias_conv += _bias
245
+
246
+ kernel_final = kernel_conv + kernel_scale + kernel_identity
247
+ bias_final = bias_conv + bias_scale + bias_identity
248
+ return kernel_final, bias_final
249
+
250
+ def _fuse_bn_tensor(
251
+ self, branch: Union[nn.Sequential, nn.BatchNorm2d]
252
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
253
+ """Method to fuse batchnorm layer with preceeding conv layer.
254
+ Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
255
+
256
+ Args:
257
+ branch: Sequence of ops to be fused.
258
+
259
+ Returns:
260
+ Tuple of (kernel, bias) after fusing batchnorm.
261
+ """
262
+ if isinstance(branch, nn.Sequential):
263
+ kernel = branch.conv.weight
264
+ running_mean = branch.bn.running_mean
265
+ running_var = branch.bn.running_var
266
+ gamma = branch.bn.weight
267
+ beta = branch.bn.bias
268
+ eps = branch.bn.eps
269
+ else:
270
+ assert isinstance(branch, nn.BatchNorm2d)
271
+ if not hasattr(self, "id_tensor"):
272
+ input_dim = self.in_channels // self.groups
273
+
274
+ kernel_size = self.kernel_size
275
+ if isinstance(self.kernel_size, int):
276
+ kernel_size = (self.kernel_size, self.kernel_size)
277
+
278
+ kernel_value = torch.zeros(
279
+ (self.in_channels, input_dim, kernel_size[0], kernel_size[1]),
280
+ dtype=branch.weight.dtype,
281
+ device=branch.weight.device,
282
+ )
283
+ for i in range(self.in_channels):
284
+ kernel_value[
285
+ i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2
286
+ ] = 1
287
+ self.id_tensor = kernel_value
288
+ kernel = self.id_tensor
289
+ running_mean = branch.running_mean
290
+ running_var = branch.running_var
291
+ gamma = branch.weight
292
+ beta = branch.bias
293
+ eps = branch.eps
294
+ std = (running_var + eps).sqrt()
295
+ t = (gamma / std).reshape(-1, 1, 1, 1)
296
+ return kernel * t, beta - running_mean * gamma / std
297
+
298
+ def _conv_bn(self, kernel_size: int, padding: int) -> nn.Sequential:
299
+ """Helper method to construct conv-batchnorm layers.
300
+
301
+ Args:
302
+ kernel_size: Size of the convolution kernel.
303
+ padding: Zero-padding size.
304
+
305
+ Returns:
306
+ Conv-BN module.
307
+ """
308
+ mod_list = nn.Sequential()
309
+ mod_list.add_module(
310
+ "conv",
311
+ nn.Conv2d(
312
+ in_channels=self.in_channels,
313
+ out_channels=self.out_channels,
314
+ kernel_size=kernel_size,
315
+ stride=self.stride,
316
+ padding=padding,
317
+ groups=self.groups,
318
+ bias=False,
319
+ ),
320
+ )
321
+ mod_list.add_module("bn", nn.BatchNorm2d(num_features=self.out_channels))
322
+ return mod_list
323
+
324
+
325
+ def reparameterize_model(model: torch.nn.Module) -> nn.Module:
326
+ """Method returns a model where a multi-branched structure
327
+ used in training is re-parameterized into a single branch
328
+ for inference.
329
+
330
+ Args:
331
+ model: MobileOne model in train mode.
332
+
333
+ Returns:
334
+ MobileOne model in inference mode.
335
+ """
336
+ # Avoid editing original graph
337
+ model = copy.deepcopy(model)
338
+ for module in model.modules():
339
+ if hasattr(module, "reparameterize"):
340
+ module.reparameterize()
341
+ return model
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Pillow
2
+ open_clip_torch