Spaces:
Running
Running
File size: 4,060 Bytes
ad19202 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
/// <reference path="./node.d.ts" />
import type {
GenerateWrappedInput,
SubjectType,
WrappedCacheEntry,
WrappedResult,
} from "../domain/types";
/**
* Dataset cache placeholder.
*
* This module is intentionally neutral: reads are public-only and writes are
* disabled by default. To enable persistence in a Hugging Face dataset, set:
* - WRAPPED_DATASET_ID="username/dataset-name"
* - HF_TOKEN with write permissions for that dataset
* - WRAPPED_DATASET_WRITE=true
*
* You can create the dataset via the Hub API:
* curl -X POST https://huggingface.co/api/repos/create \\
* -H "Authorization: Bearer $HF_TOKEN" \\
* -d '{"type":"dataset","name":"hf-wrapped-2025"}'
*/
const DATASET_ID = process.env.WRAPPED_DATASET_ID;
const WRITE_ENABLED = process.env.WRAPPED_DATASET_WRITE === "true";
const HUB_BASE_URL = "https://huggingface.co";
const DATA_DIR = process.env.WRAPPED_DATASET_DIR ?? "data";
// Per-user/year JSON (avoids concurrent writes on a single file)
export async function readCachedWrapped(
input: GenerateWrappedInput,
): Promise<WrappedCacheEntry | null> {
if (!DATASET_ID) {
return null;
}
const subjectCandidates: SubjectType[] =
input.subjectType && input.subjectType !== "auto"
? [input.subjectType]
: ["user", "organization"];
for (const subjectType of subjectCandidates) {
const url = buildResolveUrl({
handle: input.handle,
year: input.year,
subjectType,
});
const cached = await safeJsonFetch<WrappedCacheEntry>(url);
if (cached) {
return cached;
}
}
return null;
}
export async function writeWrappedSnapshot(
result: WrappedResult,
): Promise<void> {
if (!DATASET_ID || !WRITE_ENABLED) {
return;
}
const token = process.env.HF_TOKEN;
if (!token) {
return;
}
await writePerUserJson(result, token);
}
function buildResolveUrl(params: {
handle: string;
year: number;
subjectType: SubjectType;
}) {
const path = buildCachePath(params);
const encodedPath = encodeURIComponent(path).replace(/%2F/g, "/");
return `${HUB_BASE_URL}/datasets/${DATASET_ID}/resolve/main/${encodedPath}`;
}
async function safeJsonFetch<T>(url: string): Promise<T | null> {
try {
const response = await fetch(url, {
headers: { accept: "application/json" },
next: { revalidate: 120 },
} as RequestInit & { next?: { revalidate?: number } });
if (!response.ok) {
return null;
}
return (await response.json()) as T;
} catch {
return null;
}
}
async function writePerUserJson(
result: WrappedResult,
token: string,
): Promise<void> {
const path = buildCachePath({
handle: result.profile.handle,
year: result.year,
subjectType: result.profile.subjectType,
});
const payload = JSON.stringify(result);
const contentBase64 = Buffer.from(payload, "utf-8").toString("base64");
const response = await fetch(
`${HUB_BASE_URL}/api/datasets/${DATASET_ID}/commit?repo_type=dataset`,
{
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
accept: "application/json",
},
body: JSON.stringify({
operations: [
{
operation: "add_or_update",
path_in_repo: path,
content: contentBase64,
encoding: "base64",
},
],
commit_message: "Add wrapped snapshot",
summary: "Add wrapped snapshot",
}),
},
);
if (!response.ok) {
const text = await response.text().catch(() => "");
console.error(
"[wrapped] Failed to write snapshot",
JSON.stringify({
status: response.status,
statusText: response.statusText,
path,
body: text.slice(0, 500),
}),
);
return;
}
let info: unknown = null;
try {
info = await response.json();
} catch {
// ignore
}
console.log(
"[wrapped] Snapshot stored",
JSON.stringify({
dataset: DATASET_ID,
path,
status: response.status,
info,
}),
);
}
function buildCachePath(params: {
handle: string;
year: number;
subjectType: SubjectType;
}) {
const fileName = `${params.year}-${params.subjectType}-${params.handle}.json`;
return `${DATA_DIR}/${fileName}`;
}
|