File size: 4,060 Bytes
ad19202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/// <reference path="./node.d.ts" />
import type {
	GenerateWrappedInput,
	SubjectType,
	WrappedCacheEntry,
	WrappedResult,
} from "../domain/types";

/**
 * Dataset cache placeholder.
 *
 * This module is intentionally neutral: reads are public-only and writes are
 * disabled by default. To enable persistence in a Hugging Face dataset, set:
 * - WRAPPED_DATASET_ID="username/dataset-name"
 * - HF_TOKEN with write permissions for that dataset
 * - WRAPPED_DATASET_WRITE=true
 *
 * You can create the dataset via the Hub API:
 * curl -X POST https://huggingface.co/api/repos/create \\
 *   -H "Authorization: Bearer $HF_TOKEN" \\
 *   -d '{"type":"dataset","name":"hf-wrapped-2025"}'
 */

const DATASET_ID = process.env.WRAPPED_DATASET_ID;
const WRITE_ENABLED = process.env.WRAPPED_DATASET_WRITE === "true";
const HUB_BASE_URL = "https://huggingface.co";
const DATA_DIR = process.env.WRAPPED_DATASET_DIR ?? "data";
// Per-user/year JSON (avoids concurrent writes on a single file)

export async function readCachedWrapped(
	input: GenerateWrappedInput,
): Promise<WrappedCacheEntry | null> {
	if (!DATASET_ID) {
		return null;
	}

	const subjectCandidates: SubjectType[] =
		input.subjectType && input.subjectType !== "auto"
			? [input.subjectType]
			: ["user", "organization"];

	for (const subjectType of subjectCandidates) {
		const url = buildResolveUrl({
			handle: input.handle,
			year: input.year,
			subjectType,
		});
		const cached = await safeJsonFetch<WrappedCacheEntry>(url);
		if (cached) {
			return cached;
		}
	}

	return null;
}

export async function writeWrappedSnapshot(
	result: WrappedResult,
): Promise<void> {
	if (!DATASET_ID || !WRITE_ENABLED) {
		return;
	}
	const token = process.env.HF_TOKEN;
	if (!token) {
		return;
	}

	await writePerUserJson(result, token);
}

function buildResolveUrl(params: {
	handle: string;
	year: number;
	subjectType: SubjectType;
}) {
	const path = buildCachePath(params);
	const encodedPath = encodeURIComponent(path).replace(/%2F/g, "/");
	return `${HUB_BASE_URL}/datasets/${DATASET_ID}/resolve/main/${encodedPath}`;
}

async function safeJsonFetch<T>(url: string): Promise<T | null> {
	try {
		const response = await fetch(url, {
			headers: { accept: "application/json" },
			next: { revalidate: 120 },
		} as RequestInit & { next?: { revalidate?: number } });
		if (!response.ok) {
			return null;
		}
		return (await response.json()) as T;
	} catch {
		return null;
	}
}

async function writePerUserJson(
	result: WrappedResult,
	token: string,
): Promise<void> {
	const path = buildCachePath({
		handle: result.profile.handle,
		year: result.year,
		subjectType: result.profile.subjectType,
	});

	const payload = JSON.stringify(result);
	const contentBase64 = Buffer.from(payload, "utf-8").toString("base64");

	const response = await fetch(
		`${HUB_BASE_URL}/api/datasets/${DATASET_ID}/commit?repo_type=dataset`,
		{
			method: "POST",
			headers: {
				Authorization: `Bearer ${token}`,
				"Content-Type": "application/json",
				accept: "application/json",
			},
			body: JSON.stringify({
				operations: [
					{
						operation: "add_or_update",
						path_in_repo: path,
						content: contentBase64,
						encoding: "base64",
					},
				],
				commit_message: "Add wrapped snapshot",
				summary: "Add wrapped snapshot",
			}),
		},
	);

	if (!response.ok) {
		const text = await response.text().catch(() => "");
		console.error(
			"[wrapped] Failed to write snapshot",
			JSON.stringify({
				status: response.status,
				statusText: response.statusText,
				path,
				body: text.slice(0, 500),
			}),
		);
		return;
	}

	let info: unknown = null;
	try {
		info = await response.json();
	} catch {
		// ignore
	}

	console.log(
		"[wrapped] Snapshot stored",
		JSON.stringify({
			dataset: DATASET_ID,
			path,
			status: response.status,
			info,
		}),
	);
}

function buildCachePath(params: {
	handle: string;
	year: number;
	subjectType: SubjectType;
}) {
	const fileName = `${params.year}-${params.subjectType}-${params.handle}.json`;
	return `${DATA_DIR}/${fileName}`;
}