devusman commited on
Commit
63bdb43
Β·
1 Parent(s): f46a3f8
Files changed (4) hide show
  1. .vscode/settings.json +3 -0
  2. index.html +287 -0
  3. server copy.js +691 -0
  4. server.js +269 -163
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "liveServer.settings.port": 5501
3
+ }
index.html ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>StuDocu Downloader</title>
8
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
9
+ <style>
10
+ body {
11
+ font-family: 'Poppins', sans-serif;
12
+ background-color: #f0f2f5;
13
+ margin: 0;
14
+ display: flex;
15
+ justify-content: center;
16
+ align-items: center;
17
+ min-height: 100vh;
18
+ padding: 20px;
19
+ box-sizing: border-box;
20
+ }
21
+
22
+ .container {
23
+ background-color: #ffffff;
24
+ padding: 30px 40px;
25
+ border-radius: 12px;
26
+ box-shadow: 0 8px 30px rgba(0, 0, 0, 0.08);
27
+ width: 100%;
28
+ max-width: 650px;
29
+ text-align: center;
30
+ transition: all 0.3s ease;
31
+ }
32
+
33
+ .header .logo {
34
+ width: 40px;
35
+ height: 40px;
36
+ color: #007bff;
37
+ margin-bottom: 10px;
38
+ }
39
+
40
+ .header h1 {
41
+ color: #2c3e50;
42
+ margin: 0 0 10px;
43
+ font-weight: 600;
44
+ }
45
+
46
+ .header p {
47
+ color: #7f8c8d;
48
+ margin-bottom: 30px;
49
+ font-size: 1rem;
50
+ }
51
+
52
+ .form-container {
53
+ display: flex;
54
+ margin-bottom: 20px;
55
+ }
56
+
57
+ #studocu-url {
58
+ flex-grow: 1;
59
+ padding: 14px 18px;
60
+ border: 1px solid #dfe4ea;
61
+ border-radius: 8px 0 0 8px;
62
+ font-size: 16px;
63
+ outline: none;
64
+ transition: border-color 0.3s ease, box-shadow 0.3s ease;
65
+ }
66
+
67
+ #studocu-url:focus {
68
+ border-color: #007bff;
69
+ box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15);
70
+ }
71
+
72
+ #download-btn {
73
+ padding: 14px 25px;
74
+ border: none;
75
+ background-color: #007bff;
76
+ color: white;
77
+ font-size: 16px;
78
+ font-weight: 600;
79
+ border-radius: 0 8px 8px 0;
80
+ cursor: pointer;
81
+ outline: none;
82
+ position: relative;
83
+ transition: background-color 0.3s ease;
84
+ }
85
+
86
+ #download-btn:hover {
87
+ background-color: #0056b3;
88
+ }
89
+
90
+ #download-btn:disabled {
91
+ background-color: #5a9eeb;
92
+ cursor: not-allowed;
93
+ }
94
+
95
+ /* Loader animation */
96
+ .btn-loader {
97
+ display: none;
98
+ border: 3px solid #f3f3f3;
99
+ border-top: 3px solid #0056b3;
100
+ border-radius: 50%;
101
+ width: 20px;
102
+ height: 20px;
103
+ animation: spin 1s linear infinite;
104
+ }
105
+
106
+ #download-btn.loading .btn-text {
107
+ display: none;
108
+ }
109
+
110
+ #download-btn.loading .btn-loader {
111
+ display: block;
112
+ margin: 0 auto;
113
+ }
114
+
115
+ @keyframes spin {
116
+ 0% {
117
+ transform: rotate(0deg);
118
+ }
119
+
120
+ 100% {
121
+ transform: rotate(360deg);
122
+ }
123
+ }
124
+
125
+ .status-indicator {
126
+ padding: 12px;
127
+ border-radius: 8px;
128
+ margin-top: 20px;
129
+ font-size: 0.95rem;
130
+ display: block;
131
+ }
132
+
133
+ .status-indicator.success {
134
+ background-color: #d4edda;
135
+ color: #155724;
136
+ border: 1px solid #c3e6cb;
137
+ }
138
+
139
+ .status-indicator.error {
140
+ background-color: #f8d7da;
141
+ color: #721c24;
142
+ border: 1px solid #f5c6cb;
143
+ }
144
+
145
+ .status-indicator.info {
146
+ background-color: #cce5ff;
147
+ color: #004085;
148
+ border: 1px solid #b8daff;
149
+ }
150
+
151
+
152
+ .footer {
153
+ margin-top: 30px;
154
+ font-size: 0.8rem;
155
+ color: #95a5a6;
156
+ }
157
+ </style>
158
+ </head>
159
+
160
+ <body>
161
+ <div class="container">
162
+ <div class="header">
163
+ <svg class="logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor">
164
+ <path
165
+ d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 14h-2v-2h2v2zm0-4h-2V7h2v5z" />
166
+ </svg>
167
+ <h1>StuDocu Document Downloader</h1>
168
+ <p>Paste a valid StuDocu document URL to generate and download a PDF.</p>
169
+ </div>
170
+ <div class="main-content">
171
+ <div class="form-container">
172
+ <input type="text" id="studocu-url" placeholder="https://www.studocu.com/en-us/document/...">
173
+ <button id="download-btn">
174
+ <span class="btn-text">Download</span>
175
+ <span class="btn-loader"></span>
176
+ </button>
177
+ </div>
178
+ <div class="status-indicator" id="status-indicator" style="display: none;">
179
+ <!-- Messages will be displayed here by JavaScript -->
180
+ </div>
181
+ </div>
182
+ <div class="footer">
183
+ <p>Powered by the Heart by Us</p>
184
+ </div>
185
+ </div>
186
+ <script>document.addEventListener('DOMContentLoaded', () => {
187
+ const downloadBtn = document.getElementById('download-btn');
188
+ const urlInput = document.getElementById('studocu-url');
189
+ const statusIndicator = document.getElementById('status-indicator');
190
+
191
+ const API_ENDPOINT = 'https://devusman-test.hf.space/api/download';
192
+
193
+ downloadBtn.addEventListener('click', async () => {
194
+ const url = urlInput.value.trim();
195
+
196
+ // 1. Validate the input URL
197
+ if (!url) {
198
+ showStatus('Please paste a URL first.', 'error');
199
+ return;
200
+ }
201
+
202
+ if (!url.includes('studocu.com')) {
203
+ showStatus('Please provide a valid StuDocu URL.', 'error');
204
+ return;
205
+ }
206
+
207
+ // 2. Update UI to show loading state
208
+ setLoading(true);
209
+ showStatus('Request sent. Please wait, this can take up to a minute...', 'info');
210
+
211
+ try {
212
+ // 3. Send the POST request to the API
213
+ const response = await fetch(API_ENDPOINT, {
214
+ method: 'POST',
215
+ headers: {
216
+ 'Content-Type': 'application/json',
217
+ },
218
+ body: JSON.stringify({ url: url }),
219
+ });
220
+
221
+ // 4. Handle the response
222
+ if (response.ok) {
223
+ // If successful, the response body is the PDF file
224
+ showStatus('Success! Your download will start now.', 'success');
225
+ const blob = await response.blob();
226
+
227
+ // Create a temporary link to trigger the download
228
+ const downloadUrl = window.URL.createObjectURL(blob);
229
+ const a = document.createElement('a');
230
+ a.style.display = 'none';
231
+ a.href = downloadUrl;
232
+
233
+ // Suggest a filename for the download
234
+ a.download = 'studocu-document.pdf';
235
+ document.body.appendChild(a);
236
+ a.click();
237
+
238
+ // Clean up the temporary URL and link
239
+ window.URL.revokeObjectURL(downloadUrl);
240
+ a.remove();
241
+
242
+ } else {
243
+ // If there's an error, parse the JSON to get the error message
244
+ const errorData = await response.json();
245
+ showStatus(`Error: ${errorData.error || 'An unknown error occurred.'}`, 'error');
246
+ }
247
+
248
+ } catch (error) {
249
+ // Handle network errors or other exceptions
250
+ console.error('Download failed:', error);
251
+ showStatus('Failed to connect to the server. Please check your connection and try again.', 'error');
252
+ } finally {
253
+ // 5. Reset the UI from the loading state
254
+ setLoading(false);
255
+ }
256
+ });
257
+
258
+ /**
259
+ * Updates the button and input field to reflect the loading state.
260
+ * @param {boolean} isLoading - Whether the app is in a loading state.
261
+ */
262
+ function setLoading(isLoading) {
263
+ if (isLoading) {
264
+ downloadBtn.classList.add('loading');
265
+ downloadBtn.disabled = true;
266
+ urlInput.disabled = true;
267
+ } else {
268
+ downloadBtn.classList.remove('loading');
269
+ downloadBtn.disabled = false;
270
+ urlInput.disabled = false;
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Displays a status message to the user.
276
+ * @param {string} message - The message to display.
277
+ * @param {'info'|'success'|'error'} type - The type of message.
278
+ */
279
+ function showStatus(message, type) {
280
+ statusIndicator.style.display = 'block';
281
+ statusIndicator.textContent = message;
282
+ statusIndicator.className = `status-indicator ${type}`;
283
+ }
284
+ });</script>
285
+ </body>
286
+
287
+ </html>
server copy.js ADDED
@@ -0,0 +1,691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const puppeteer = require('puppeteer');
3
+ const cors = require('cors');
4
+ const app = express();
5
+ const port = 7860;
6
+
7
+ app.use(cors());
8
+ app.use(express.json());
9
+
10
+ /**
11
+ * Advanced cookie banner and content bypass for StuDocu
12
+ */
13
+ const bypassCookiesAndRestrictions = async (page) => {
14
+ console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
15
+
16
+ // Step 1: Set cookies before page load
17
+ const preCookies = [
18
+ { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
19
+ { name: 'cookie_consent', value: 'true', domain: '.studocu.com' },
20
+ { name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' },
21
+ { name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' },
22
+ { name: 'user_consent', value: '1', domain: '.studocu.com' },
23
+ { name: 'analytics_consent', value: 'false', domain: '.studocu.com' },
24
+ { name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
25
+ { name: 'functional_consent', value: 'true', domain: '.studocu.com' },
26
+ ];
27
+
28
+ for (const cookie of preCookies) {
29
+ try {
30
+ await page.setCookie(cookie);
31
+ } catch (e) {
32
+ console.log(`Failed to set cookie ${cookie.name}:`, e.message);
33
+ }
34
+ }
35
+
36
+ // Step 2: Inject CSS to hide cookie banners immediately
37
+ await page.addStyleTag({
38
+ content: `
39
+ /* Hide all possible cookie banners */
40
+ [id*="cookie" i]:not(img):not(input),
41
+ [class*="cookie" i]:not(img):not(input),
42
+ [data-testid*="cookie" i],
43
+ [aria-label*="cookie" i],
44
+ .gdpr-banner, .gdpr-popup, .gdpr-modal,
45
+ .consent-banner, .consent-popup, .consent-modal,
46
+ .privacy-banner, .privacy-popup, .privacy-modal,
47
+ .cookie-law, .cookie-policy, .cookie-compliance,
48
+ .onetrust-banner-sdk, #onetrust-consent-sdk,
49
+ .cmp-banner, .cmp-popup, .cmp-modal,
50
+ [class*="CookieBanner"], [class*="CookieNotice"],
51
+ [class*="ConsentBanner"], [class*="ConsentManager"],
52
+ .cc-banner, .cc-window, .cc-compliance,
53
+ div[style*="position: fixed"]:has-text("cookie"),
54
+ div[style*="position: fixed"]:has-text("consent"),
55
+ .fixed:has-text("cookie"), .fixed:has-text("consent") {
56
+ display: none !important;
57
+ visibility: hidden !important;
58
+ opacity: 0 !important;
59
+ z-index: -9999 !important;
60
+ pointer-events: none !important;
61
+ }
62
+
63
+ /* Remove blur and premium overlays */
64
+ [class*="blur" i], [class*="premium" i],
65
+ [class*="paywall" i], [class*="sample-preview-blur" i] {
66
+ filter: none !important;
67
+ backdrop-filter: none !important;
68
+ opacity: 1 !important;
69
+ visibility: visible !important;
70
+ }
71
+
72
+ /* Ensure document content is visible */
73
+ .document-content, .page-content, [data-page] {
74
+ filter: none !important;
75
+ opacity: 1 !important;
76
+ visibility: visible !important;
77
+ pointer-events: auto !important;
78
+ }
79
+
80
+ /* Remove fixed overlays */
81
+ .fixed-overlay, .sticky-overlay, .content-overlay {
82
+ display: none !important;
83
+ }
84
+
85
+ /* Restore scrolling */
86
+ html, body {
87
+ overflow: auto !important;
88
+ position: static !important;
89
+ }
90
+ `
91
+ });
92
+
93
+ // Step 3: Inject JavaScript to handle dynamic cookie banners
94
+ await page.evaluateOnNewDocument(() => {
95
+ // Override common cookie consent functions
96
+ window.cookieConsent = { accepted: true };
97
+ window.gtag = () => { };
98
+ window.ga = () => { };
99
+ window.dataLayer = [];
100
+
101
+ // Mutation observer to catch dynamically added cookie banners
102
+ const observer = new MutationObserver((mutations) => {
103
+ mutations.forEach((mutation) => {
104
+ mutation.addedNodes.forEach((node) => {
105
+ if (node.nodeType === 1) { // Element node
106
+ const element = node;
107
+ const text = element.textContent || '';
108
+ const className = element.className || '';
109
+ const id = element.id || '';
110
+
111
+ // Check if this looks like a cookie banner
112
+ if (
113
+ text.toLowerCase().includes('cookie') ||
114
+ text.toLowerCase().includes('consent') ||
115
+ text.toLowerCase().includes('privacy policy') ||
116
+ className.toLowerCase().includes('cookie') ||
117
+ className.toLowerCase().includes('consent') ||
118
+ className.toLowerCase().includes('gdpr') ||
119
+ id.toLowerCase().includes('cookie') ||
120
+ id.toLowerCase().includes('consent')
121
+ ) {
122
+ console.log('Removing detected cookie banner:', element);
123
+ element.remove();
124
+ }
125
+ }
126
+ });
127
+ });
128
+ });
129
+
130
+ observer.observe(document.body, { childList: true, subtree: true });
131
+
132
+ // Set up periodic cleanup
133
+ setInterval(() => {
134
+ const cookieElements = document.querySelectorAll(`
135
+ [id*="cookie" i]:not(img):not(input),
136
+ [class*="cookie" i]:not(img):not(input),
137
+ [data-testid*="cookie" i],
138
+ .gdpr-banner, .consent-banner, .privacy-banner,
139
+ .onetrust-banner-sdk, #onetrust-consent-sdk,
140
+ .cmp-banner, .cc-banner
141
+ `);
142
+ cookieElements.forEach(el => el.remove());
143
+
144
+ // Restore body scroll
145
+ document.body.style.overflow = 'auto';
146
+ document.documentElement.style.overflow = 'auto';
147
+ }, 1000);
148
+ });
149
+
150
+ return true;
151
+ };
152
+
153
+ /**
154
+ * Enhanced content unblurring and premium bypass (integrated from extension script)
155
+ */
156
+ const unblurContent = async (page) => {
157
+ console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
158
+
159
+ await page.evaluate(() => {
160
+ // Function to remove all visual restrictions
161
+ const removeRestrictions = () => {
162
+ const removeBySelector = (selector) => {
163
+ document.querySelectorAll(selector).forEach(el => el.remove());
164
+ };
165
+
166
+ // Remove ads by known class or ID
167
+ removeBySelector("#adbox");
168
+ removeBySelector(".adsbox");
169
+ removeBySelector(".ad-box");
170
+ removeBySelector(".banner-ads");
171
+ removeBySelector(".advert");
172
+
173
+ // Remove premium banner container
174
+ removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
175
+
176
+ // Enhanced blur removal
177
+ const removeBlur = (element = document) => {
178
+ element.querySelectorAll("*").forEach(el => {
179
+ const style = window.getComputedStyle(el);
180
+
181
+ // Check for blur via filter, backdrop-filter, or class names
182
+ if (
183
+ style.filter?.includes("blur") ||
184
+ style.backdropFilter?.includes("blur") ||
185
+ parseFloat(style.opacity) < 1 ||
186
+ (el.className && el.className.toString().toLowerCase().includes("blur")) ||
187
+ (el.className && el.className.toString().toLowerCase().includes("premium"))
188
+ ) {
189
+ el.style.filter = "none !important";
190
+ el.style.backdropFilter = "none !important";
191
+ el.style.opacity = "1 !important";
192
+ if (el.classList) {
193
+ el.classList.remove("blur", "blurred", "premium-blur");
194
+ }
195
+ }
196
+
197
+ // Check parent elements for blur-inducing styles
198
+ const parent = el.parentElement;
199
+ if (parent) {
200
+ const parentStyle = window.getComputedStyle(parent);
201
+ if (
202
+ parentStyle.filter?.includes("blur") ||
203
+ parentStyle.backdropFilter?.includes("blur") ||
204
+ parseFloat(parentStyle.opacity) < 1
205
+ ) {
206
+ parent.style.filter = "none !important";
207
+ parent.style.backdropFilter = "none !important";
208
+ parent.style.opacity = "1 !important";
209
+ }
210
+ }
211
+ });
212
+ };
213
+
214
+ // Remove dark overlays and paywall-like elements
215
+ document.querySelectorAll("div, section, aside").forEach(el => {
216
+ const style = window.getComputedStyle(el);
217
+ if (
218
+ style.backgroundColor.includes("rgba") &&
219
+ (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
220
+ (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
221
+ (el.className && el.className.toString().toLowerCase().includes("paywall"))
222
+ ) {
223
+ el.remove();
224
+ }
225
+ });
226
+
227
+ removeBlur();
228
+
229
+ // Remove other restrictions
230
+ removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i]');
231
+
232
+ // Ensure document content is visible
233
+ const contentSelectors = [
234
+ '.document-content', '.page-content', '.content',
235
+ '[data-page]', '[data-testid*="document"]', '[data-testid*="page"]',
236
+ '.page', '.document-page', 'main', 'article'
237
+ ];
238
+ contentSelectors.forEach(selector => {
239
+ document.querySelectorAll(selector).forEach(el => {
240
+ el.style.setProperty('filter', 'none', 'important');
241
+ el.style.setProperty('opacity', '1', 'important');
242
+ el.style.setProperty('visibility', 'visible', 'important');
243
+ el.style.setProperty('display', 'block', 'important');
244
+ el.style.setProperty('pointer-events', 'auto', 'important');
245
+ });
246
+ });
247
+
248
+ // Remove overlay divs that might be blocking content
249
+ const overlays = document.querySelectorAll(`
250
+ [class*="overlay" i], [class*="modal" i], [class*="popup" i],
251
+ [class*="banner" i], [style*="position: fixed"],
252
+ [style*="position: absolute"][style*="z-index"]
253
+ `);
254
+ overlays.forEach(overlay => {
255
+ const text = overlay.textContent || '';
256
+ if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
257
+ text.includes('cookie') || text.includes('consent') || text.includes('login')) {
258
+ overlay.remove();
259
+ }
260
+ });
261
+ };
262
+
263
+ // Run immediately
264
+ removeRestrictions();
265
+
266
+ // Run periodically
267
+ const intervalId = setInterval(removeRestrictions, 2000);
268
+
269
+ // Clean up after 60 seconds
270
+ setTimeout(() => {
271
+ clearInterval(intervalId);
272
+ }, 60000);
273
+ });
274
+ };
275
+
276
+ /**
277
+ * Apply print styles for clean PDF output (integrated from extension script with improvements)
278
+ */
279
+ const applyPrintStyles = async (page) => {
280
+ console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
281
+
282
+ await page.evaluate(() => {
283
+ const style = document.createElement("style");
284
+ style.id = "print-style-extension";
285
+ style.innerHTML = `
286
+ @page {
287
+ size: A4 portrait;
288
+ margin: 5mm;
289
+ }
290
+ @media print {
291
+ html, body {
292
+ margin: 0 !important;
293
+ padding: 0 !important;
294
+ overflow: visible !important;
295
+ }
296
+ header, footer, nav, aside, .no-print, .ads, .sidebar,
297
+ .premium-banner, .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho,
298
+ .Sidebar_sidebar-scrollable__kqeBZ, .HeaderWrapper_header-wrapper__mCmf3,
299
+ .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
300
+ .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
301
+ .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper,
302
+ #top-bar-wrapper, .Layout_sidebar-wrapper__unavM,
303
+ .Layout_is-open__9DQr4 {
304
+ display: none !important;
305
+ }
306
+ body {
307
+ background: white !important;
308
+ color: black !important;
309
+ }
310
+ * {
311
+ box-shadow: none !important;
312
+ background: transparent !important;
313
+ }
314
+ .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
315
+ .Viewer_document-wrapper__XsO4j, .page-content {
316
+ display: flex !important;
317
+ flex-direction: column !important;
318
+ width: 100% !important;
319
+ max-width: 210mm !important;
320
+ margin: 0 auto !important;
321
+ }
322
+ [data-page], .page, .document-page, img {
323
+ page-break-after: always !important;
324
+ page-break-inside: avoid !important;
325
+ page-break-before: avoid !important;
326
+ width: 100% !important;
327
+ max-width: 100% !important;
328
+ height: auto !important;
329
+ }
330
+ }
331
+ `;
332
+ document.head.appendChild(style);
333
+ });
334
+ };
335
+
336
+ /**
337
+ * Enhanced StuDocu downloader with comprehensive bypasses and login support
338
+ */
339
+ const studocuDownloader = async (url, options = {}) => {
340
+ let browser;
341
+ try {
342
+ console.log("πŸš€ Launching browser with stealth configuration...");
343
+ // Replace this part in your server.js (around line 343)
344
+ browser = await puppeteer.launch({
345
+ headless: "new", // Use new headless mode
346
+ args: [
347
+ '--no-sandbox',
348
+ '--disable-setuid-sandbox',
349
+ '--disable-dev-shm-usage',
350
+ '--disable-accelerated-2d-canvas',
351
+ '--no-first-run',
352
+ '--no-zygote',
353
+ '--disable-gpu',
354
+ '--disable-features=VizDisplayCompositor',
355
+ '--disable-background-networking',
356
+ '--disable-background-timer-throttling',
357
+ '--disable-renderer-backgrounding',
358
+ '--disable-backgrounding-occluded-windows',
359
+ '--disable-ipc-flooding-protection',
360
+ '--disable-web-security',
361
+ '--disable-features=site-per-process',
362
+ '--disable-blink-features=AutomationControlled',
363
+ '--disable-extensions',
364
+ '--single-process', // Important for containers
365
+ '--disable-background-tasks',
366
+ '--disable-default-apps',
367
+ '--disable-sync',
368
+ '--metrics-recording-only',
369
+ '--no-default-browser-check',
370
+ '--no-pings',
371
+ '--password-store=basic',
372
+ '--use-mock-keychain',
373
+ '--disable-gpu-sandbox'
374
+ ],
375
+ timeout: 300000,
376
+ executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'
377
+ });
378
+
379
+ const page = await browser.newPage();
380
+
381
+ // Set realistic browser characteristics
382
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
383
+ await page.setViewport({ width: 794, height: 1122 });
384
+
385
+ // Hide webdriver property
386
+ await page.evaluateOnNewDocument(() => {
387
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
388
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
389
+ Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
390
+ });
391
+
392
+ // Set up cookie and content bypass
393
+ await bypassCookiesAndRestrictions(page);
394
+
395
+ // Block unnecessary resources
396
+ await page.setRequestInterception(true);
397
+ page.on('request', (req) => {
398
+ const resourceType = req.resourceType();
399
+ const reqUrl = req.url();
400
+
401
+ // Block trackers, ads, and analytics
402
+ if (
403
+ reqUrl.includes('doubleclick') ||
404
+ reqUrl.includes('googletagmanager') ||
405
+ reqUrl.includes('facebook.com') ||
406
+ reqUrl.includes('twitter.com') ||
407
+ reqUrl.includes('analytics') ||
408
+ reqUrl.includes('gtm') ||
409
+ reqUrl.includes('hotjar') ||
410
+ reqUrl.includes('mixpanel') ||
411
+ reqUrl.includes('onetrust') ||
412
+ reqUrl.includes('cookielaw') ||
413
+ resourceType === 'other' && reqUrl.includes('track')
414
+ ) {
415
+ req.abort();
416
+ } else {
417
+ req.continue();
418
+ }
419
+ });
420
+
421
+ // Login if credentials provided (for premium content)
422
+ if (options.email && options.password) {
423
+ console.log("πŸ”‘ Logging in to StuDocu...");
424
+ await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
425
+ await page.waitForSelector('#email', { timeout: 15000 });
426
+ await page.type('#email', options.email);
427
+ await page.type('#password', options.password);
428
+ await page.click('button[type="submit"]');
429
+ try {
430
+ await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
431
+ // Additional check for successful login
432
+ await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
433
+ console.log("βœ… Login successful.");
434
+ } catch (e) {
435
+ console.error("❌ Login failed:", e.message);
436
+ throw new Error("Login failed. Check credentials, if CAPTCHA is present, or try again.");
437
+ }
438
+ } else {
439
+ console.log("⚠️ No login credentials provided. Full unblurred content requires premium account.");
440
+ }
441
+
442
+ console.log(`πŸ“„ Navigating to ${url}...`);
443
+
444
+ // Navigate with retry logic
445
+ let navigationSuccess = false;
446
+ let attempts = 0;
447
+ const maxAttempts = 3;
448
+ while (!navigationSuccess && attempts < maxAttempts) {
449
+ try {
450
+ attempts++;
451
+ console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
452
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
453
+ navigationSuccess = true;
454
+ } catch (e) {
455
+ console.log(`Navigation attempt ${attempts} failed:`, e.message);
456
+ if (attempts >= maxAttempts) throw e;
457
+ await new Promise(resolve => setTimeout(resolve, 5000));
458
+ }
459
+ }
460
+
461
+ // Wait for initial load
462
+ await new Promise(resolve => setTimeout(resolve, 5000));
463
+
464
+ // Apply content unblurring
465
+ await unblurContent(page);
466
+
467
+ // Wait for document content with multiple selectors
468
+ console.log("⏳ Waiting for document content to load...");
469
+ const contentSelectors = [
470
+ '.document-content', '.page-content', '[data-page]',
471
+ '[data-testid*="document"]', 'img[src*="document"]',
472
+ 'img[src*="page"]', '.page', 'main img', 'article img'
473
+ ];
474
+ let contentFound = false;
475
+ for (const selector of contentSelectors) {
476
+ try {
477
+ await page.waitForSelector(selector, { timeout: 20000 });
478
+ console.log(`βœ… Found content with selector: ${selector}`);
479
+ contentFound = true;
480
+ break;
481
+ } catch (e) {
482
+ console.log(`❌ Selector ${selector} not found, trying next...`);
483
+ }
484
+ }
485
+ if (!contentFound) {
486
+ console.log("⚠️ No specific content selector found, proceeding with page content...");
487
+ }
488
+
489
+ // Enhanced scrolling to load all content with loop for stability
490
+ console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
491
+ await page.evaluate(async () => {
492
+ const delay = (ms) => new Promise((res) => setTimeout(res, ms));
493
+
494
+ let scrollHeight = document.body.scrollHeight;
495
+ while (true) {
496
+ let totalHeight = 0;
497
+ const distance = 300;
498
+ while (totalHeight < scrollHeight) {
499
+ window.scrollBy(0, distance);
500
+ totalHeight += distance;
501
+ await delay(500); // Increased delay for better loading
502
+ }
503
+
504
+ await delay(2000); // Extra wait after reaching bottom
505
+
506
+ const newHeight = document.body.scrollHeight;
507
+ if (newHeight === scrollHeight) break;
508
+ scrollHeight = newHeight;
509
+ }
510
+
511
+ // Scroll to top
512
+ window.scrollTo({ top: 0, behavior: "smooth" });
513
+ await delay(1000);
514
+ });
515
+
516
+ // Re-apply unblur after loading new content
517
+ await unblurContent(page);
518
+
519
+ // Wait for all images to load
520
+ console.log("πŸ–ΌοΈ Waiting for all images to load...");
521
+ await page.evaluate(async () => {
522
+ const images = Array.from(document.querySelectorAll('img'));
523
+ await Promise.all(images.map(img => {
524
+ if (img.complete) return Promise.resolve();
525
+ return new Promise((resolve) => {
526
+ img.addEventListener('load', resolve);
527
+ img.addEventListener('error', resolve);
528
+ setTimeout(resolve, 15000);
529
+ });
530
+ }));
531
+ });
532
+
533
+ // Additional wait for any lazy loading
534
+ await new Promise(resolve => setTimeout(resolve, 10000));
535
+
536
+ // Set exact height to avoid extra blank pages
537
+ console.log("πŸ“ Setting exact document height...");
538
+ await page.evaluate(() => {
539
+ const getDocumentHeight = () => Math.max(
540
+ document.body.scrollHeight, document.body.offsetHeight,
541
+ document.documentElement.clientHeight, document.documentElement.scrollHeight,
542
+ document.documentElement.offsetHeight
543
+ );
544
+ const height = getDocumentHeight();
545
+ document.body.style.height = `${height}px !important`;
546
+ document.documentElement.style.height = `${height}px !important`;
547
+ document.body.style.overflow = 'hidden !important';
548
+ });
549
+
550
+ // Final content verification
551
+ const contentCheck = await page.evaluate(() => {
552
+ const textContent = document.body.textContent || '';
553
+ const images = document.querySelectorAll('img');
554
+ const documentImages = Array.from(images).filter(img =>
555
+ img.src.includes('document') || img.src.includes('page') ||
556
+ img.alt.includes('document') || img.alt.includes('page')
557
+ );
558
+ return {
559
+ totalText: textContent.length,
560
+ totalImages: images.length,
561
+ documentImages: documentImages.length,
562
+ hasDocumentContent: documentImages.length > 0 || textContent.length > 1000,
563
+ sampleText: textContent.substring(0, 300)
564
+ };
565
+ });
566
+ console.log("πŸ“Š Content verification:", {
567
+ textLength: contentCheck.totalText,
568
+ images: contentCheck.totalImages,
569
+ documentImages: contentCheck.documentImages,
570
+ hasContent: contentCheck.hasDocumentContent
571
+ });
572
+
573
+ if (!contentCheck.hasDocumentContent) {
574
+ console.warn("⚠️ Warning: Limited document content detected. Use premium credentials for full access.");
575
+ }
576
+
577
+ // Apply print styles
578
+ await applyPrintStyles(page);
579
+
580
+ // Emulate print media
581
+ await page.emulateMediaType('print');
582
+
583
+ // Generate PDF
584
+ console.log("πŸ”„ Generating PDF...");
585
+ const pdfBuffer = await page.pdf({
586
+ printBackground: true,
587
+ preferCSSPageSize: true,
588
+ displayHeaderFooter: false,
589
+ timeout: 180000,
590
+ scale: 1,
591
+ omitBackground: false
592
+ });
593
+ console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
594
+
595
+ return pdfBuffer;
596
+ } catch (error) {
597
+ console.error("❌ Error during PDF generation:", error);
598
+ if (error.message.includes('timeout')) {
599
+ throw new Error("Request timed out. The document may be taking too long to load. Please try again.");
600
+ } else if (error.message.includes('net::')) {
601
+ throw new Error("Network error. Please check the URL and your internet connection.");
602
+ } else if (error.message.includes('ERR_BLOCKED')) {
603
+ throw new Error("Access blocked. Try again or check if the document is publicly accessible.");
604
+ } else {
605
+ throw new Error(`Failed to generate PDF: ${error.message}`);
606
+ }
607
+ } finally {
608
+ if (browser) {
609
+ console.log("πŸ”’ Closing browser...");
610
+ try {
611
+ await browser.close();
612
+ } catch (e) {
613
+ console.log("Error closing browser:", e.message);
614
+ }
615
+ }
616
+ }
617
+ };
618
+
619
+ // API Routes
620
+ app.post('/api/download', async (req, res) => {
621
+ const { url, filename, email, password } = req.body;
622
+
623
+ if (!url) {
624
+ return res.status(400).json({ error: 'URL is required.' });
625
+ }
626
+
627
+ if (!url.includes('studocu.com')) {
628
+ return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
629
+ }
630
+
631
+ let normalizedUrl = url.trim();
632
+ if (!normalizedUrl.startsWith('http')) {
633
+ normalizedUrl = 'https://' + normalizedUrl;
634
+ }
635
+
636
+ console.log(`🎯 Processing request for: ${normalizedUrl}`);
637
+
638
+ try {
639
+ const startTime = Date.now();
640
+ const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
641
+ const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
642
+
643
+ res.setHeader('Content-Type', 'application/pdf');
644
+ res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
645
+ res.setHeader('Content-Length', pdfBuffer.length);
646
+ res.send(pdfBuffer);
647
+
648
+ console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
649
+ } catch (error) {
650
+ console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
651
+ res.status(500).json({ error: error.message });
652
+ }
653
+ });
654
+
655
+ app.get('/health', (req, res) => {
656
+ res.json({ status: 'healthy', timestamp: new Date().toISOString(), uptime: process.uptime() });
657
+ });
658
+
659
+ app.get('/', (req, res) => {
660
+ res.json({
661
+ message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
662
+ version: '5.3',
663
+ features: [
664
+ 'πŸͺ Advanced cookie banner bypass',
665
+ 'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
666
+ 'πŸ”‘ Login support for full unblurred content access',
667
+ 'πŸ€– Anti-bot detection evasion',
668
+ 'πŸ“„ Full document content extraction with print styles for clean PDF'
669
+ ],
670
+ endpoints: {
671
+ download: 'POST /api/download (body: {url, filename?, email?, password?})',
672
+ health: 'GET /health'
673
+ },
674
+ note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
675
+ });
676
+ });
677
+
678
+ process.on('SIGTERM', () => {
679
+ console.log('SIGTERM received, shutting down gracefully...');
680
+ process.exit(0);
681
+ });
682
+
683
+ process.on('SIGINT', () => {
684
+ console.log('SIGINT received, shutting down gracefully...');
685
+ process.exit(0);
686
+ });
687
+
688
+ app.listen(port, () => {
689
+ console.log(`πŸš€ Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
690
+ console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
691
+ });
server.js CHANGED
@@ -12,7 +12,6 @@ app.use(express.json());
12
  */
13
  const bypassCookiesAndRestrictions = async (page) => {
14
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
15
-
16
  // Step 1: Set cookies before page load
17
  const preCookies = [
18
  { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
@@ -24,7 +23,6 @@ const bypassCookiesAndRestrictions = async (page) => {
24
  { name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
25
  { name: 'functional_consent', value: 'true', domain: '.studocu.com' },
26
  ];
27
-
28
  for (const cookie of preCookies) {
29
  try {
30
  await page.setCookie(cookie);
@@ -36,58 +34,42 @@ const bypassCookiesAndRestrictions = async (page) => {
36
  // Step 2: Inject CSS to hide cookie banners immediately
37
  await page.addStyleTag({
38
  content: `
39
- /* Hide all possible cookie banners */
40
- [id*="cookie" i]:not(img):not(input),
41
- [class*="cookie" i]:not(img):not(input),
42
- [data-testid*="cookie" i],
43
- [aria-label*="cookie" i],
44
- .gdpr-banner, .gdpr-popup, .gdpr-modal,
45
- .consent-banner, .consent-popup, .consent-modal,
46
- .privacy-banner, .privacy-popup, .privacy-modal,
47
- .cookie-law, .cookie-policy, .cookie-compliance,
48
- .onetrust-banner-sdk, #onetrust-consent-sdk,
49
- .cmp-banner, .cmp-popup, .cmp-modal,
50
- [class*="CookieBanner"], [class*="CookieNotice"],
51
- [class*="ConsentBanner"], [class*="ConsentManager"],
52
- .cc-banner, .cc-window, .cc-compliance,
53
- div[style*="position: fixed"]:has-text("cookie"),
54
- div[style*="position: fixed"]:has-text("consent"),
55
- .fixed:has-text("cookie"), .fixed:has-text("consent") {
56
- display: none !important;
57
- visibility: hidden !important;
58
- opacity: 0 !important;
59
- z-index: -9999 !important;
60
- pointer-events: none !important;
61
- }
62
-
63
- /* Remove blur and premium overlays */
64
- [class*="blur" i], [class*="premium" i],
65
- [class*="paywall" i], [class*="sample-preview-blur" i] {
66
- filter: none !important;
67
- backdrop-filter: none !important;
68
- opacity: 1 !important;
69
- visibility: visible !important;
70
- }
71
-
72
- /* Ensure document content is visible */
73
- .document-content, .page-content, [data-page] {
74
- filter: none !important;
75
- opacity: 1 !important;
76
- visibility: visible !important;
77
- pointer-events: auto !important;
78
- }
79
-
80
- /* Remove fixed overlays */
81
- .fixed-overlay, .sticky-overlay, .content-overlay {
82
- display: none !important;
83
- }
84
-
85
- /* Restore scrolling */
86
- html, body {
87
- overflow: auto !important;
88
- position: static !important;
89
- }
90
- `
91
  });
92
 
93
  // Step 3: Inject JavaScript to handle dynamic cookie banners
@@ -107,7 +89,6 @@ const bypassCookiesAndRestrictions = async (page) => {
107
  const text = element.textContent || '';
108
  const className = element.className || '';
109
  const id = element.id || '';
110
-
111
  // Check if this looks like a cookie banner
112
  if (
113
  text.toLowerCase().includes('cookie') ||
@@ -126,21 +107,16 @@ const bypassCookiesAndRestrictions = async (page) => {
126
  });
127
  });
128
  });
129
-
130
  observer.observe(document.body, { childList: true, subtree: true });
131
 
132
  // Set up periodic cleanup
133
  setInterval(() => {
134
  const cookieElements = document.querySelectorAll(`
135
- [id*="cookie" i]:not(img):not(input),
136
- [class*="cookie" i]:not(img):not(input),
137
- [data-testid*="cookie" i],
138
- .gdpr-banner, .consent-banner, .privacy-banner,
139
- .onetrust-banner-sdk, #onetrust-consent-sdk,
140
- .cmp-banner, .cc-banner
141
- `);
142
  cookieElements.forEach(el => el.remove());
143
-
144
  // Restore body scroll
145
  document.body.style.overflow = 'auto';
146
  document.documentElement.style.overflow = 'auto';
@@ -155,7 +131,6 @@ const bypassCookiesAndRestrictions = async (page) => {
155
  */
156
  const unblurContent = async (page) => {
157
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
158
-
159
  await page.evaluate(() => {
160
  // Function to remove all visual restrictions
161
  const removeRestrictions = () => {
@@ -177,7 +152,6 @@ const unblurContent = async (page) => {
177
  const removeBlur = (element = document) => {
178
  element.querySelectorAll("*").forEach(el => {
179
  const style = window.getComputedStyle(el);
180
-
181
  // Check for blur via filter, backdrop-filter, or class names
182
  if (
183
  style.filter?.includes("blur") ||
@@ -193,7 +167,6 @@ const unblurContent = async (page) => {
193
  el.classList.remove("blur", "blurred", "premium-blur");
194
  }
195
  }
196
-
197
  // Check parent elements for blur-inducing styles
198
  const parent = el.parentElement;
199
  if (parent) {
@@ -215,8 +188,7 @@ const unblurContent = async (page) => {
215
  document.querySelectorAll("div, section, aside").forEach(el => {
216
  const style = window.getComputedStyle(el);
217
  if (
218
- style.backgroundColor.includes("rgba") &&
219
- (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
220
  (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
221
  (el.className && el.className.toString().toLowerCase().includes("paywall"))
222
  ) {
@@ -231,9 +203,8 @@ const unblurContent = async (page) => {
231
 
232
  // Ensure document content is visible
233
  const contentSelectors = [
234
- '.document-content', '.page-content', '.content',
235
- '[data-page]', '[data-testid*="document"]', '[data-testid*="page"]',
236
- '.page', '.document-page', 'main', 'article'
237
  ];
238
  contentSelectors.forEach(selector => {
239
  document.querySelectorAll(selector).forEach(el => {
@@ -247,14 +218,12 @@ const unblurContent = async (page) => {
247
 
248
  // Remove overlay divs that might be blocking content
249
  const overlays = document.querySelectorAll(`
250
- [class*="overlay" i], [class*="modal" i], [class*="popup" i],
251
- [class*="banner" i], [style*="position: fixed"],
252
- [style*="position: absolute"][style*="z-index"]
253
- `);
254
  overlays.forEach(overlay => {
255
  const text = overlay.textContent || '';
256
- if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
257
- text.includes('cookie') || text.includes('consent') || text.includes('login')) {
258
  overlay.remove();
259
  }
260
  });
@@ -278,57 +247,53 @@ const unblurContent = async (page) => {
278
  */
279
  const applyPrintStyles = async (page) => {
280
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
281
-
282
  await page.evaluate(() => {
283
  const style = document.createElement("style");
284
  style.id = "print-style-extension";
285
  style.innerHTML = `
286
- @page {
287
- size: A4 portrait;
288
- margin: 5mm;
289
- }
290
- @media print {
291
- html, body {
292
- margin: 0 !important;
293
- padding: 0 !important;
294
- overflow: visible !important;
295
- }
296
- header, footer, nav, aside, .no-print, .ads, .sidebar,
297
- .premium-banner, .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho,
298
- .Sidebar_sidebar-scrollable__kqeBZ, .HeaderWrapper_header-wrapper__mCmf3,
299
- .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
300
- .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
301
- .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper,
302
- #top-bar-wrapper, .Layout_sidebar-wrapper__unavM,
303
- .Layout_is-open__9DQr4 {
304
- display: none !important;
305
- }
306
- body {
307
- background: white !important;
308
- color: black !important;
309
- }
310
- * {
311
- box-shadow: none !important;
312
- background: transparent !important;
313
- }
314
- .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
315
- .Viewer_document-wrapper__XsO4j, .page-content {
316
- display: flex !important;
317
- flex-direction: column !important;
318
- width: 100% !important;
319
- max-width: 210mm !important;
320
- margin: 0 auto !important;
321
- }
322
- [data-page], .page, .document-page, img {
323
- page-break-after: always !important;
324
- page-break-inside: avoid !important;
325
- page-break-before: avoid !important;
326
- width: 100% !important;
327
- max-width: 100% !important;
328
- height: auto !important;
329
- }
330
- }
331
- `;
332
  document.head.appendChild(style);
333
  });
334
  };
@@ -340,9 +305,8 @@ const studocuDownloader = async (url, options = {}) => {
340
  let browser;
341
  try {
342
  console.log("πŸš€ Launching browser with stealth configuration...");
343
- // Replace this part in your server.js (around line 343)
344
  browser = await puppeteer.launch({
345
- headless: "new", // Use new headless mode
346
  args: [
347
  '--no-sandbox',
348
  '--disable-setuid-sandbox',
@@ -360,20 +324,9 @@ const studocuDownloader = async (url, options = {}) => {
360
  '--disable-web-security',
361
  '--disable-features=site-per-process',
362
  '--disable-blink-features=AutomationControlled',
363
- '--disable-extensions',
364
- '--single-process', // Important for containers
365
- '--disable-background-tasks',
366
- '--disable-default-apps',
367
- '--disable-sync',
368
- '--metrics-recording-only',
369
- '--no-default-browser-check',
370
- '--no-pings',
371
- '--password-store=basic',
372
- '--use-mock-keychain',
373
- '--disable-gpu-sandbox'
374
  ],
375
  timeout: 300000,
376
- executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'
377
  });
378
 
379
  const page = await browser.newPage();
@@ -397,7 +350,6 @@ const studocuDownloader = async (url, options = {}) => {
397
  page.on('request', (req) => {
398
  const resourceType = req.resourceType();
399
  const reqUrl = req.url();
400
-
401
  // Block trackers, ads, and analytics
402
  if (
403
  reqUrl.includes('doubleclick') ||
@@ -410,7 +362,7 @@ const studocuDownloader = async (url, options = {}) => {
410
  reqUrl.includes('mixpanel') ||
411
  reqUrl.includes('onetrust') ||
412
  reqUrl.includes('cookielaw') ||
413
- resourceType === 'other' && reqUrl.includes('track')
414
  ) {
415
  req.abort();
416
  } else {
@@ -440,7 +392,6 @@ const studocuDownloader = async (url, options = {}) => {
440
  }
441
 
442
  console.log(`πŸ“„ Navigating to ${url}...`);
443
-
444
  // Navigate with retry logic
445
  let navigationSuccess = false;
446
  let attempts = 0;
@@ -467,9 +418,8 @@ const studocuDownloader = async (url, options = {}) => {
467
  // Wait for document content with multiple selectors
468
  console.log("⏳ Waiting for document content to load...");
469
  const contentSelectors = [
470
- '.document-content', '.page-content', '[data-page]',
471
- '[data-testid*="document"]', 'img[src*="document"]',
472
- 'img[src*="page"]', '.page', 'main img', 'article img'
473
  ];
474
  let contentFound = false;
475
  for (const selector of contentSelectors) {
@@ -482,6 +432,7 @@ const studocuDownloader = async (url, options = {}) => {
482
  console.log(`❌ Selector ${selector} not found, trying next...`);
483
  }
484
  }
 
485
  if (!contentFound) {
486
  console.log("⚠️ No specific content selector found, proceeding with page content...");
487
  }
@@ -490,7 +441,6 @@ const studocuDownloader = async (url, options = {}) => {
490
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
491
  await page.evaluate(async () => {
492
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
493
-
494
  let scrollHeight = document.body.scrollHeight;
495
  while (true) {
496
  let totalHeight = 0;
@@ -500,14 +450,11 @@ const studocuDownloader = async (url, options = {}) => {
500
  totalHeight += distance;
501
  await delay(500); // Increased delay for better loading
502
  }
503
-
504
  await delay(2000); // Extra wait after reaching bottom
505
-
506
  const newHeight = document.body.scrollHeight;
507
  if (newHeight === scrollHeight) break;
508
  scrollHeight = newHeight;
509
  }
510
-
511
  // Scroll to top
512
  window.scrollTo({ top: 0, behavior: "smooth" });
513
  await delay(1000);
@@ -538,8 +485,7 @@ const studocuDownloader = async (url, options = {}) => {
538
  await page.evaluate(() => {
539
  const getDocumentHeight = () => Math.max(
540
  document.body.scrollHeight, document.body.offsetHeight,
541
- document.documentElement.clientHeight, document.documentElement.scrollHeight,
542
- document.documentElement.offsetHeight
543
  );
544
  const height = getDocumentHeight();
545
  document.body.style.height = `${height}px !important`;
@@ -590,9 +536,10 @@ const studocuDownloader = async (url, options = {}) => {
590
  scale: 1,
591
  omitBackground: false
592
  });
593
- console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
594
 
 
595
  return pdfBuffer;
 
596
  } catch (error) {
597
  console.error("❌ Error during PDF generation:", error);
598
  if (error.message.includes('timeout')) {
@@ -616,14 +563,144 @@ const studocuDownloader = async (url, options = {}) => {
616
  }
617
  };
618
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  // API Routes
 
 
620
  app.post('/api/download', async (req, res) => {
621
  const { url, filename, email, password } = req.body;
622
-
623
  if (!url) {
624
  return res.status(400).json({ error: 'URL is required.' });
625
  }
626
-
627
  if (!url.includes('studocu.com')) {
628
  return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
629
  }
@@ -634,17 +711,14 @@ app.post('/api/download', async (req, res) => {
634
  }
635
 
636
  console.log(`🎯 Processing request for: ${normalizedUrl}`);
637
-
638
  try {
639
  const startTime = Date.now();
640
  const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
641
  const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
642
-
643
  res.setHeader('Content-Type', 'application/pdf');
644
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
645
  res.setHeader('Content-Length', pdfBuffer.length);
646
  res.send(pdfBuffer);
647
-
648
  console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
649
  } catch (error) {
650
  console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
@@ -652,23 +726,55 @@ app.post('/api/download', async (req, res) => {
652
  }
653
  });
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  app.get('/health', (req, res) => {
656
- res.json({ status: 'healthy', timestamp: new Date().toISOString(), uptime: process.uptime() });
 
 
 
 
657
  });
658
 
659
  app.get('/', (req, res) => {
660
  res.json({
661
- message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
662
- version: '5.3',
663
  features: [
664
  'πŸͺ Advanced cookie banner bypass',
665
  'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
666
  'πŸ”‘ Login support for full unblurred content access',
667
  'πŸ€– Anti-bot detection evasion',
668
- 'πŸ“„ Full document content extraction with print styles for clean PDF'
 
669
  ],
670
  endpoints: {
671
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
 
672
  health: 'GET /health'
673
  },
674
  note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
@@ -686,6 +792,6 @@ process.on('SIGINT', () => {
686
  });
687
 
688
  app.listen(port, () => {
689
- console.log(`πŸš€ Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
690
- console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
691
  });
 
12
  */
13
  const bypassCookiesAndRestrictions = async (page) => {
14
  console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
 
15
  // Step 1: Set cookies before page load
16
  const preCookies = [
17
  { name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
 
23
  { name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
24
  { name: 'functional_consent', value: 'true', domain: '.studocu.com' },
25
  ];
 
26
  for (const cookie of preCookies) {
27
  try {
28
  await page.setCookie(cookie);
 
34
  // Step 2: Inject CSS to hide cookie banners immediately
35
  await page.addStyleTag({
36
  content: `
37
+ /* Hide all possible cookie banners */
38
+ [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i],
39
+ .gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal,
40
+ .cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal,
41
+ [class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance,
42
+ div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") {
43
+ display: none !important;
44
+ visibility: hidden !important;
45
+ opacity: 0 !important;
46
+ z-index: -9999 !important;
47
+ pointer-events: none !important;
48
+ }
49
+ /* Remove blur and premium overlays */
50
+ [class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i] {
51
+ filter: none !important;
52
+ backdrop-filter: none !important;
53
+ opacity: 1 !important;
54
+ visibility: visible !important;
55
+ }
56
+ /* Ensure document content is visible */
57
+ .document-content, .page-content, [data-page] {
58
+ filter: none !important;
59
+ opacity: 1 !important;
60
+ visibility: visible !important;
61
+ pointer-events: auto !important;
62
+ }
63
+ /* Remove fixed overlays */
64
+ .fixed-overlay, .sticky-overlay, .content-overlay {
65
+ display: none !important;
66
+ }
67
+ /* Restore scrolling */
68
+ html, body {
69
+ overflow: auto !important;
70
+ position: static !important;
71
+ }
72
+ `
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  });
74
 
75
  // Step 3: Inject JavaScript to handle dynamic cookie banners
 
89
  const text = element.textContent || '';
90
  const className = element.className || '';
91
  const id = element.id || '';
 
92
  // Check if this looks like a cookie banner
93
  if (
94
  text.toLowerCase().includes('cookie') ||
 
107
  });
108
  });
109
  });
 
110
  observer.observe(document.body, { childList: true, subtree: true });
111
 
112
  // Set up periodic cleanup
113
  setInterval(() => {
114
  const cookieElements = document.querySelectorAll(`
115
+ [id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i],
116
+ .gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk,
117
+ .cmp-banner, .cc-banner
118
+ `);
 
 
 
119
  cookieElements.forEach(el => el.remove());
 
120
  // Restore body scroll
121
  document.body.style.overflow = 'auto';
122
  document.documentElement.style.overflow = 'auto';
 
131
  */
132
  const unblurContent = async (page) => {
133
  console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
 
134
  await page.evaluate(() => {
135
  // Function to remove all visual restrictions
136
  const removeRestrictions = () => {
 
152
  const removeBlur = (element = document) => {
153
  element.querySelectorAll("*").forEach(el => {
154
  const style = window.getComputedStyle(el);
 
155
  // Check for blur via filter, backdrop-filter, or class names
156
  if (
157
  style.filter?.includes("blur") ||
 
167
  el.classList.remove("blur", "blurred", "premium-blur");
168
  }
169
  }
 
170
  // Check parent elements for blur-inducing styles
171
  const parent = el.parentElement;
172
  if (parent) {
 
188
  document.querySelectorAll("div, section, aside").forEach(el => {
189
  const style = window.getComputedStyle(el);
190
  if (
191
+ (style.backgroundColor.includes("rgba") && (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000)) ||
 
192
  (el.className && el.className.toString().toLowerCase().includes("overlay")) ||
193
  (el.className && el.className.toString().toLowerCase().includes("paywall"))
194
  ) {
 
203
 
204
  // Ensure document content is visible
205
  const contentSelectors = [
206
+ '.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
207
+ '[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
 
208
  ];
209
  contentSelectors.forEach(selector => {
210
  document.querySelectorAll(selector).forEach(el => {
 
218
 
219
  // Remove overlay divs that might be blocking content
220
  const overlays = document.querySelectorAll(`
221
+ [class*="overlay" i], [class*="modal" i], [class*="popup" i], [class*="banner" i],
222
+ [style*="position: fixed"], [style*="position: absolute"][style*="z-index"]
223
+ `);
 
224
  overlays.forEach(overlay => {
225
  const text = overlay.textContent || '';
226
+ if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') || text.includes('cookie') || text.includes('consent') || text.includes('login')) {
 
227
  overlay.remove();
228
  }
229
  });
 
247
  */
248
  const applyPrintStyles = async (page) => {
249
  console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
 
250
  await page.evaluate(() => {
251
  const style = document.createElement("style");
252
  style.id = "print-style-extension";
253
  style.innerHTML = `
254
+ @page {
255
+ size: A4 portrait;
256
+ margin: 5mm;
257
+ }
258
+ @media print {
259
+ html, body {
260
+ margin: 0 !important;
261
+ padding: 0 !important;
262
+ overflow: visible !important;
263
+ }
264
+ header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
265
+ .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
266
+ .HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
267
+ .Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
268
+ .InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper,
269
+ .Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
270
+ display: none !important;
271
+ }
272
+ body {
273
+ background: white !important;
274
+ color: black !important;
275
+ }
276
+ * {
277
+ box-shadow: none !important;
278
+ background: transparent !important;
279
+ }
280
+ .Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, .Viewer_document-wrapper__XsO4j, .page-content {
281
+ display: flex !important;
282
+ flex-direction: column !important;
283
+ width: 100% !important;
284
+ max-width: 210mm !important;
285
+ margin: 0 auto !important;
286
+ }
287
+ [data-page], .page, .document-page, img {
288
+ page-break-after: always !important;
289
+ page-break-inside: avoid !important;
290
+ page-break-before: avoid !important;
291
+ width: 100% !important;
292
+ max-width: 100% !important;
293
+ height: auto !important;
294
+ }
295
+ }
296
+ `;
 
 
 
297
  document.head.appendChild(style);
298
  });
299
  };
 
305
  let browser;
306
  try {
307
  console.log("πŸš€ Launching browser with stealth configuration...");
 
308
  browser = await puppeteer.launch({
309
+ headless: true,
310
  args: [
311
  '--no-sandbox',
312
  '--disable-setuid-sandbox',
 
324
  '--disable-web-security',
325
  '--disable-features=site-per-process',
326
  '--disable-blink-features=AutomationControlled',
327
+ '--disable-extensions'
 
 
 
 
 
 
 
 
 
 
328
  ],
329
  timeout: 300000,
 
330
  });
331
 
332
  const page = await browser.newPage();
 
350
  page.on('request', (req) => {
351
  const resourceType = req.resourceType();
352
  const reqUrl = req.url();
 
353
  // Block trackers, ads, and analytics
354
  if (
355
  reqUrl.includes('doubleclick') ||
 
362
  reqUrl.includes('mixpanel') ||
363
  reqUrl.includes('onetrust') ||
364
  reqUrl.includes('cookielaw') ||
365
+ (resourceType === 'other' && reqUrl.includes('track'))
366
  ) {
367
  req.abort();
368
  } else {
 
392
  }
393
 
394
  console.log(`πŸ“„ Navigating to ${url}...`);
 
395
  // Navigate with retry logic
396
  let navigationSuccess = false;
397
  let attempts = 0;
 
418
  // Wait for document content with multiple selectors
419
  console.log("⏳ Waiting for document content to load...");
420
  const contentSelectors = [
421
+ '.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
422
+ 'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
 
423
  ];
424
  let contentFound = false;
425
  for (const selector of contentSelectors) {
 
432
  console.log(`❌ Selector ${selector} not found, trying next...`);
433
  }
434
  }
435
+
436
  if (!contentFound) {
437
  console.log("⚠️ No specific content selector found, proceeding with page content...");
438
  }
 
441
  console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
442
  await page.evaluate(async () => {
443
  const delay = (ms) => new Promise((res) => setTimeout(res, ms));
 
444
  let scrollHeight = document.body.scrollHeight;
445
  while (true) {
446
  let totalHeight = 0;
 
450
  totalHeight += distance;
451
  await delay(500); // Increased delay for better loading
452
  }
 
453
  await delay(2000); // Extra wait after reaching bottom
 
454
  const newHeight = document.body.scrollHeight;
455
  if (newHeight === scrollHeight) break;
456
  scrollHeight = newHeight;
457
  }
 
458
  // Scroll to top
459
  window.scrollTo({ top: 0, behavior: "smooth" });
460
  await delay(1000);
 
485
  await page.evaluate(() => {
486
  const getDocumentHeight = () => Math.max(
487
  document.body.scrollHeight, document.body.offsetHeight,
488
+ document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight
 
489
  );
490
  const height = getDocumentHeight();
491
  document.body.style.height = `${height}px !important`;
 
536
  scale: 1,
537
  omitBackground: false
538
  });
 
539
 
540
+ console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
541
  return pdfBuffer;
542
+
543
  } catch (error) {
544
  console.error("❌ Error during PDF generation:", error);
545
  if (error.message.includes('timeout')) {
 
563
  }
564
  };
565
 
566
+ /**
567
+ * NEW: StuDocu downloader with page-by-page progress streaming
568
+ */
569
+ const studocuDownloaderStreamed = async (url, options, res) => {
570
+ let browser;
571
+ try {
572
+ console.log("πŸš€ Launching browser for streaming with stealth configuration...");
573
+ browser = await puppeteer.launch({
574
+ headless: true,
575
+ args: [
576
+ '--no-sandbox',
577
+ '--disable-setuid-sandbox',
578
+ '--disable-dev-shm-usage',
579
+ '--disable-accelerated-2d-canvas',
580
+ '--no-first-run',
581
+ '--no-zygote',
582
+ '--disable-gpu'
583
+ ],
584
+ timeout: 300000,
585
+ });
586
+
587
+ const page = await browser.newPage();
588
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
589
+ await page.setViewport({ width: 794, height: 1122 });
590
+
591
+ await page.evaluateOnNewDocument(() => {
592
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
593
+ });
594
+
595
+ await bypassCookiesAndRestrictions(page);
596
+
597
+ await page.setRequestInterception(true);
598
+ page.on('request', (req) => {
599
+ if (['image', 'stylesheet', 'font', 'other'].includes(req.resourceType()) && !req.url().includes('studocu.com')) {
600
+ req.abort();
601
+ } else {
602
+ req.continue();
603
+ }
604
+ });
605
+
606
+ if (options.email && options.password) {
607
+ console.log("πŸ”‘ Logging in for streaming...");
608
+ await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded' });
609
+ await page.waitForSelector('#email');
610
+ await page.type('#email', options.email);
611
+ await page.type('#password', options.password);
612
+ await page.click('button[type="submit"]');
613
+ await page.waitForNavigation({ waitUntil: 'networkidle2' });
614
+ console.log("βœ… Login successful for streaming.");
615
+ }
616
+
617
+ console.log(`πŸ“„ Navigating to ${url} for streaming...`);
618
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
619
+ await new Promise(resolve => setTimeout(resolve, 5000));
620
+
621
+ await unblurContent(page);
622
+
623
+ console.log("⏳ Waiting for document pages to load...");
624
+ await page.waitForSelector('[data-page]', { timeout: 30000 });
625
+
626
+ console.log("πŸ“œ Scrolling to load all pages for streaming...");
627
+ await page.evaluate(async () => {
628
+ await new Promise(resolve => {
629
+ let totalHeight = 0;
630
+ const distance = 100;
631
+ const timer = setInterval(() => {
632
+ const scrollHeight = document.body.scrollHeight;
633
+ window.scrollBy(0, distance);
634
+ totalHeight += distance;
635
+ if (totalHeight >= scrollHeight) {
636
+ clearInterval(timer);
637
+ resolve();
638
+ }
639
+ }, 100);
640
+ });
641
+ });
642
+
643
+ await unblurContent(page);
644
+ await new Promise(resolve => setTimeout(resolve, 5000));
645
+
646
+ const pageElements = await page.$$('[data-page]');
647
+ const totalPages = pageElements.length;
648
+ console.log(`πŸ“„ Found ${totalPages} pages to stream.`);
649
+
650
+ if (totalPages === 0) {
651
+ throw new Error("No document pages found to stream. The content might be protected or not loaded correctly.");
652
+ }
653
+
654
+ // Set headers for streaming
655
+ res.setHeader('Content-Type', 'application/json');
656
+ res.setHeader('Transfer-Encoding', 'chunked');
657
+
658
+ for (let i = 0; i < totalPages; i++) {
659
+ console.log(`🎨 Rendering page ${i + 1} of ${totalPages}...`);
660
+ const pageElement = pageElements[i];
661
+ const imageData = await pageElement.screenshot({ type: 'png', encoding: 'base64' });
662
+
663
+ const progressUpdate = {
664
+ pageNumber: i + 1,
665
+ totalPages: totalPages,
666
+ imageData: `data:image/png;base64,${imageData}`
667
+ };
668
+
669
+ res.write(JSON.stringify(progressUpdate) + '\n'); // Send as a new line delimited JSON
670
+ }
671
+
672
+ console.log("βœ… All pages have been rendered and sent.");
673
+
674
+ } catch (error) {
675
+ console.error("❌ Error during streamed download:", error);
676
+ const errorResponse = {
677
+ error: `Failed to generate streamed PDF: ${error.message}`
678
+ };
679
+ if (!res.headersSent) {
680
+ res.status(500).json(errorResponse);
681
+ } else {
682
+ res.write(JSON.stringify(errorResponse) + '\n');
683
+ }
684
+ } finally {
685
+ if (browser) {
686
+ console.log("πŸ”’ Closing browser for streaming...");
687
+ await browser.close();
688
+ }
689
+ if (!res.writableEnded) {
690
+ res.end(); // End the stream
691
+ }
692
+ }
693
+ };
694
+
695
+
696
  // API Routes
697
+
698
+ // Original endpoint for downloading the full PDF at once
699
  app.post('/api/download', async (req, res) => {
700
  const { url, filename, email, password } = req.body;
 
701
  if (!url) {
702
  return res.status(400).json({ error: 'URL is required.' });
703
  }
 
704
  if (!url.includes('studocu.com')) {
705
  return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
706
  }
 
711
  }
712
 
713
  console.log(`🎯 Processing request for: ${normalizedUrl}`);
 
714
  try {
715
  const startTime = Date.now();
716
  const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
717
  const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
 
718
  res.setHeader('Content-Type', 'application/pdf');
719
  res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
720
  res.setHeader('Content-Length', pdfBuffer.length);
721
  res.send(pdfBuffer);
 
722
  console.log(`πŸŽ‰ Request completed successfully in ${processingTime}s`);
723
  } catch (error) {
724
  console.error(`❌ Failed to process ${normalizedUrl}:`, error.message);
 
726
  }
727
  });
728
 
729
+ // NEW: Endpoint for streaming the document page by page
730
+ app.post('/api/download-stream', async (req, res) => {
731
+ const { url, email, password } = req.body;
732
+ if (!url) {
733
+ return res.status(400).json({ error: 'URL is required.' });
734
+ }
735
+ if (!url.includes('studocu.com')) {
736
+ return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
737
+ }
738
+
739
+ let normalizedUrl = url.trim();
740
+ if (!normalizedUrl.startsWith('http')) {
741
+ normalizedUrl = 'https://' + normalizedUrl;
742
+ }
743
+
744
+ console.log(`🎯 Processing stream request for: ${normalizedUrl}`);
745
+ try {
746
+ await studocuDownloaderStreamed(normalizedUrl, { email, password }, res);
747
+ console.log(`πŸŽ‰ Stream request completed for ${normalizedUrl}`);
748
+ } catch (error) {
749
+ console.error(`❌ Failed to process stream for ${normalizedUrl}:`, error.message);
750
+ // Error is handled within the downloader function to ensure proper response closure
751
+ }
752
+ });
753
+
754
+
755
  app.get('/health', (req, res) => {
756
+ res.json({
757
+ status: 'healthy',
758
+ timestamp: new Date().toISOString(),
759
+ uptime: process.uptime()
760
+ });
761
  });
762
 
763
  app.get('/', (req, res) => {
764
  res.json({
765
+ message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles and Streaming',
766
+ version: '5.3.1',
767
  features: [
768
  'πŸͺ Advanced cookie banner bypass',
769
  'πŸ”“ Premium content unblurring (client-side only; server-side blur requires premium login)',
770
  'πŸ”‘ Login support for full unblurred content access',
771
  'πŸ€– Anti-bot detection evasion',
772
+ 'πŸ“„ Full document content extraction with print styles for clean PDF',
773
+ 'πŸ”„ Real-time page rendering and streaming to the frontend'
774
  ],
775
  endpoints: {
776
  download: 'POST /api/download (body: {url, filename?, email?, password?})',
777
+ download_stream: 'POST /api/download-stream (body: {url, email?, password?})',
778
  health: 'GET /health'
779
  },
780
  note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
 
792
  });
793
 
794
  app.listen(port, () => {
795
+ console.log(`πŸš€ Enhanced StuDocu Downloader v5.3.1 running on http://localhost:${port}`);
796
+ console.log(`✨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection, and real-time page streaming`);
797
  });