update
Browse files- .vscode/settings.json +3 -0
- index.html +287 -0
- server copy.js +691 -0
- server.js +269 -163
.vscode/settings.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"liveServer.settings.port": 5501
|
| 3 |
+
}
|
index.html
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>StuDocu Downloader</title>
|
| 8 |
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;600&display=swap" rel="stylesheet">
|
| 9 |
+
<style>
|
| 10 |
+
body {
|
| 11 |
+
font-family: 'Poppins', sans-serif;
|
| 12 |
+
background-color: #f0f2f5;
|
| 13 |
+
margin: 0;
|
| 14 |
+
display: flex;
|
| 15 |
+
justify-content: center;
|
| 16 |
+
align-items: center;
|
| 17 |
+
min-height: 100vh;
|
| 18 |
+
padding: 20px;
|
| 19 |
+
box-sizing: border-box;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
.container {
|
| 23 |
+
background-color: #ffffff;
|
| 24 |
+
padding: 30px 40px;
|
| 25 |
+
border-radius: 12px;
|
| 26 |
+
box-shadow: 0 8px 30px rgba(0, 0, 0, 0.08);
|
| 27 |
+
width: 100%;
|
| 28 |
+
max-width: 650px;
|
| 29 |
+
text-align: center;
|
| 30 |
+
transition: all 0.3s ease;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.header .logo {
|
| 34 |
+
width: 40px;
|
| 35 |
+
height: 40px;
|
| 36 |
+
color: #007bff;
|
| 37 |
+
margin-bottom: 10px;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.header h1 {
|
| 41 |
+
color: #2c3e50;
|
| 42 |
+
margin: 0 0 10px;
|
| 43 |
+
font-weight: 600;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
.header p {
|
| 47 |
+
color: #7f8c8d;
|
| 48 |
+
margin-bottom: 30px;
|
| 49 |
+
font-size: 1rem;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.form-container {
|
| 53 |
+
display: flex;
|
| 54 |
+
margin-bottom: 20px;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
#studocu-url {
|
| 58 |
+
flex-grow: 1;
|
| 59 |
+
padding: 14px 18px;
|
| 60 |
+
border: 1px solid #dfe4ea;
|
| 61 |
+
border-radius: 8px 0 0 8px;
|
| 62 |
+
font-size: 16px;
|
| 63 |
+
outline: none;
|
| 64 |
+
transition: border-color 0.3s ease, box-shadow 0.3s ease;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
#studocu-url:focus {
|
| 68 |
+
border-color: #007bff;
|
| 69 |
+
box-shadow: 0 0 0 3px rgba(0, 123, 255, 0.15);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
#download-btn {
|
| 73 |
+
padding: 14px 25px;
|
| 74 |
+
border: none;
|
| 75 |
+
background-color: #007bff;
|
| 76 |
+
color: white;
|
| 77 |
+
font-size: 16px;
|
| 78 |
+
font-weight: 600;
|
| 79 |
+
border-radius: 0 8px 8px 0;
|
| 80 |
+
cursor: pointer;
|
| 81 |
+
outline: none;
|
| 82 |
+
position: relative;
|
| 83 |
+
transition: background-color 0.3s ease;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
#download-btn:hover {
|
| 87 |
+
background-color: #0056b3;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
#download-btn:disabled {
|
| 91 |
+
background-color: #5a9eeb;
|
| 92 |
+
cursor: not-allowed;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
/* Loader animation */
|
| 96 |
+
.btn-loader {
|
| 97 |
+
display: none;
|
| 98 |
+
border: 3px solid #f3f3f3;
|
| 99 |
+
border-top: 3px solid #0056b3;
|
| 100 |
+
border-radius: 50%;
|
| 101 |
+
width: 20px;
|
| 102 |
+
height: 20px;
|
| 103 |
+
animation: spin 1s linear infinite;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
#download-btn.loading .btn-text {
|
| 107 |
+
display: none;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
#download-btn.loading .btn-loader {
|
| 111 |
+
display: block;
|
| 112 |
+
margin: 0 auto;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
@keyframes spin {
|
| 116 |
+
0% {
|
| 117 |
+
transform: rotate(0deg);
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
100% {
|
| 121 |
+
transform: rotate(360deg);
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.status-indicator {
|
| 126 |
+
padding: 12px;
|
| 127 |
+
border-radius: 8px;
|
| 128 |
+
margin-top: 20px;
|
| 129 |
+
font-size: 0.95rem;
|
| 130 |
+
display: block;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
.status-indicator.success {
|
| 134 |
+
background-color: #d4edda;
|
| 135 |
+
color: #155724;
|
| 136 |
+
border: 1px solid #c3e6cb;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
.status-indicator.error {
|
| 140 |
+
background-color: #f8d7da;
|
| 141 |
+
color: #721c24;
|
| 142 |
+
border: 1px solid #f5c6cb;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.status-indicator.info {
|
| 146 |
+
background-color: #cce5ff;
|
| 147 |
+
color: #004085;
|
| 148 |
+
border: 1px solid #b8daff;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
.footer {
|
| 153 |
+
margin-top: 30px;
|
| 154 |
+
font-size: 0.8rem;
|
| 155 |
+
color: #95a5a6;
|
| 156 |
+
}
|
| 157 |
+
</style>
|
| 158 |
+
</head>
|
| 159 |
+
|
| 160 |
+
<body>
|
| 161 |
+
<div class="container">
|
| 162 |
+
<div class="header">
|
| 163 |
+
<svg class="logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor">
|
| 164 |
+
<path
|
| 165 |
+
d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm1 14h-2v-2h2v2zm0-4h-2V7h2v5z" />
|
| 166 |
+
</svg>
|
| 167 |
+
<h1>StuDocu Document Downloader</h1>
|
| 168 |
+
<p>Paste a valid StuDocu document URL to generate and download a PDF.</p>
|
| 169 |
+
</div>
|
| 170 |
+
<div class="main-content">
|
| 171 |
+
<div class="form-container">
|
| 172 |
+
<input type="text" id="studocu-url" placeholder="https://www.studocu.com/en-us/document/...">
|
| 173 |
+
<button id="download-btn">
|
| 174 |
+
<span class="btn-text">Download</span>
|
| 175 |
+
<span class="btn-loader"></span>
|
| 176 |
+
</button>
|
| 177 |
+
</div>
|
| 178 |
+
<div class="status-indicator" id="status-indicator" style="display: none;">
|
| 179 |
+
<!-- Messages will be displayed here by JavaScript -->
|
| 180 |
+
</div>
|
| 181 |
+
</div>
|
| 182 |
+
<div class="footer">
|
| 183 |
+
<p>Powered by the Heart by Us</p>
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
<script>document.addEventListener('DOMContentLoaded', () => {
|
| 187 |
+
const downloadBtn = document.getElementById('download-btn');
|
| 188 |
+
const urlInput = document.getElementById('studocu-url');
|
| 189 |
+
const statusIndicator = document.getElementById('status-indicator');
|
| 190 |
+
|
| 191 |
+
const API_ENDPOINT = 'https://devusman-test.hf.space/api/download';
|
| 192 |
+
|
| 193 |
+
downloadBtn.addEventListener('click', async () => {
|
| 194 |
+
const url = urlInput.value.trim();
|
| 195 |
+
|
| 196 |
+
// 1. Validate the input URL
|
| 197 |
+
if (!url) {
|
| 198 |
+
showStatus('Please paste a URL first.', 'error');
|
| 199 |
+
return;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
if (!url.includes('studocu.com')) {
|
| 203 |
+
showStatus('Please provide a valid StuDocu URL.', 'error');
|
| 204 |
+
return;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
// 2. Update UI to show loading state
|
| 208 |
+
setLoading(true);
|
| 209 |
+
showStatus('Request sent. Please wait, this can take up to a minute...', 'info');
|
| 210 |
+
|
| 211 |
+
try {
|
| 212 |
+
// 3. Send the POST request to the API
|
| 213 |
+
const response = await fetch(API_ENDPOINT, {
|
| 214 |
+
method: 'POST',
|
| 215 |
+
headers: {
|
| 216 |
+
'Content-Type': 'application/json',
|
| 217 |
+
},
|
| 218 |
+
body: JSON.stringify({ url: url }),
|
| 219 |
+
});
|
| 220 |
+
|
| 221 |
+
// 4. Handle the response
|
| 222 |
+
if (response.ok) {
|
| 223 |
+
// If successful, the response body is the PDF file
|
| 224 |
+
showStatus('Success! Your download will start now.', 'success');
|
| 225 |
+
const blob = await response.blob();
|
| 226 |
+
|
| 227 |
+
// Create a temporary link to trigger the download
|
| 228 |
+
const downloadUrl = window.URL.createObjectURL(blob);
|
| 229 |
+
const a = document.createElement('a');
|
| 230 |
+
a.style.display = 'none';
|
| 231 |
+
a.href = downloadUrl;
|
| 232 |
+
|
| 233 |
+
// Suggest a filename for the download
|
| 234 |
+
a.download = 'studocu-document.pdf';
|
| 235 |
+
document.body.appendChild(a);
|
| 236 |
+
a.click();
|
| 237 |
+
|
| 238 |
+
// Clean up the temporary URL and link
|
| 239 |
+
window.URL.revokeObjectURL(downloadUrl);
|
| 240 |
+
a.remove();
|
| 241 |
+
|
| 242 |
+
} else {
|
| 243 |
+
// If there's an error, parse the JSON to get the error message
|
| 244 |
+
const errorData = await response.json();
|
| 245 |
+
showStatus(`Error: ${errorData.error || 'An unknown error occurred.'}`, 'error');
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
} catch (error) {
|
| 249 |
+
// Handle network errors or other exceptions
|
| 250 |
+
console.error('Download failed:', error);
|
| 251 |
+
showStatus('Failed to connect to the server. Please check your connection and try again.', 'error');
|
| 252 |
+
} finally {
|
| 253 |
+
// 5. Reset the UI from the loading state
|
| 254 |
+
setLoading(false);
|
| 255 |
+
}
|
| 256 |
+
});
|
| 257 |
+
|
| 258 |
+
/**
|
| 259 |
+
* Updates the button and input field to reflect the loading state.
|
| 260 |
+
* @param {boolean} isLoading - Whether the app is in a loading state.
|
| 261 |
+
*/
|
| 262 |
+
function setLoading(isLoading) {
|
| 263 |
+
if (isLoading) {
|
| 264 |
+
downloadBtn.classList.add('loading');
|
| 265 |
+
downloadBtn.disabled = true;
|
| 266 |
+
urlInput.disabled = true;
|
| 267 |
+
} else {
|
| 268 |
+
downloadBtn.classList.remove('loading');
|
| 269 |
+
downloadBtn.disabled = false;
|
| 270 |
+
urlInput.disabled = false;
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
/**
|
| 275 |
+
* Displays a status message to the user.
|
| 276 |
+
* @param {string} message - The message to display.
|
| 277 |
+
* @param {'info'|'success'|'error'} type - The type of message.
|
| 278 |
+
*/
|
| 279 |
+
function showStatus(message, type) {
|
| 280 |
+
statusIndicator.style.display = 'block';
|
| 281 |
+
statusIndicator.textContent = message;
|
| 282 |
+
statusIndicator.className = `status-indicator ${type}`;
|
| 283 |
+
}
|
| 284 |
+
});</script>
|
| 285 |
+
</body>
|
| 286 |
+
|
| 287 |
+
</html>
|
server copy.js
ADDED
|
@@ -0,0 +1,691 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const express = require('express');
|
| 2 |
+
const puppeteer = require('puppeteer');
|
| 3 |
+
const cors = require('cors');
|
| 4 |
+
const app = express();
|
| 5 |
+
const port = 7860;
|
| 6 |
+
|
| 7 |
+
app.use(cors());
|
| 8 |
+
app.use(express.json());
|
| 9 |
+
|
| 10 |
+
/**
|
| 11 |
+
* Advanced cookie banner and content bypass for StuDocu
|
| 12 |
+
*/
|
| 13 |
+
const bypassCookiesAndRestrictions = async (page) => {
|
| 14 |
+
console.log("πͺ Starting comprehensive cookie and restriction bypass...");
|
| 15 |
+
|
| 16 |
+
// Step 1: Set cookies before page load
|
| 17 |
+
const preCookies = [
|
| 18 |
+
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
|
| 19 |
+
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' },
|
| 20 |
+
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' },
|
| 21 |
+
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' },
|
| 22 |
+
{ name: 'user_consent', value: '1', domain: '.studocu.com' },
|
| 23 |
+
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' },
|
| 24 |
+
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
|
| 25 |
+
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
|
| 26 |
+
];
|
| 27 |
+
|
| 28 |
+
for (const cookie of preCookies) {
|
| 29 |
+
try {
|
| 30 |
+
await page.setCookie(cookie);
|
| 31 |
+
} catch (e) {
|
| 32 |
+
console.log(`Failed to set cookie ${cookie.name}:`, e.message);
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
// Step 2: Inject CSS to hide cookie banners immediately
|
| 37 |
+
await page.addStyleTag({
|
| 38 |
+
content: `
|
| 39 |
+
/* Hide all possible cookie banners */
|
| 40 |
+
[id*="cookie" i]:not(img):not(input),
|
| 41 |
+
[class*="cookie" i]:not(img):not(input),
|
| 42 |
+
[data-testid*="cookie" i],
|
| 43 |
+
[aria-label*="cookie" i],
|
| 44 |
+
.gdpr-banner, .gdpr-popup, .gdpr-modal,
|
| 45 |
+
.consent-banner, .consent-popup, .consent-modal,
|
| 46 |
+
.privacy-banner, .privacy-popup, .privacy-modal,
|
| 47 |
+
.cookie-law, .cookie-policy, .cookie-compliance,
|
| 48 |
+
.onetrust-banner-sdk, #onetrust-consent-sdk,
|
| 49 |
+
.cmp-banner, .cmp-popup, .cmp-modal,
|
| 50 |
+
[class*="CookieBanner"], [class*="CookieNotice"],
|
| 51 |
+
[class*="ConsentBanner"], [class*="ConsentManager"],
|
| 52 |
+
.cc-banner, .cc-window, .cc-compliance,
|
| 53 |
+
div[style*="position: fixed"]:has-text("cookie"),
|
| 54 |
+
div[style*="position: fixed"]:has-text("consent"),
|
| 55 |
+
.fixed:has-text("cookie"), .fixed:has-text("consent") {
|
| 56 |
+
display: none !important;
|
| 57 |
+
visibility: hidden !important;
|
| 58 |
+
opacity: 0 !important;
|
| 59 |
+
z-index: -9999 !important;
|
| 60 |
+
pointer-events: none !important;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
/* Remove blur and premium overlays */
|
| 64 |
+
[class*="blur" i], [class*="premium" i],
|
| 65 |
+
[class*="paywall" i], [class*="sample-preview-blur" i] {
|
| 66 |
+
filter: none !important;
|
| 67 |
+
backdrop-filter: none !important;
|
| 68 |
+
opacity: 1 !important;
|
| 69 |
+
visibility: visible !important;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Ensure document content is visible */
|
| 73 |
+
.document-content, .page-content, [data-page] {
|
| 74 |
+
filter: none !important;
|
| 75 |
+
opacity: 1 !important;
|
| 76 |
+
visibility: visible !important;
|
| 77 |
+
pointer-events: auto !important;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
/* Remove fixed overlays */
|
| 81 |
+
.fixed-overlay, .sticky-overlay, .content-overlay {
|
| 82 |
+
display: none !important;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
/* Restore scrolling */
|
| 86 |
+
html, body {
|
| 87 |
+
overflow: auto !important;
|
| 88 |
+
position: static !important;
|
| 89 |
+
}
|
| 90 |
+
`
|
| 91 |
+
});
|
| 92 |
+
|
| 93 |
+
// Step 3: Inject JavaScript to handle dynamic cookie banners
|
| 94 |
+
await page.evaluateOnNewDocument(() => {
|
| 95 |
+
// Override common cookie consent functions
|
| 96 |
+
window.cookieConsent = { accepted: true };
|
| 97 |
+
window.gtag = () => { };
|
| 98 |
+
window.ga = () => { };
|
| 99 |
+
window.dataLayer = [];
|
| 100 |
+
|
| 101 |
+
// Mutation observer to catch dynamically added cookie banners
|
| 102 |
+
const observer = new MutationObserver((mutations) => {
|
| 103 |
+
mutations.forEach((mutation) => {
|
| 104 |
+
mutation.addedNodes.forEach((node) => {
|
| 105 |
+
if (node.nodeType === 1) { // Element node
|
| 106 |
+
const element = node;
|
| 107 |
+
const text = element.textContent || '';
|
| 108 |
+
const className = element.className || '';
|
| 109 |
+
const id = element.id || '';
|
| 110 |
+
|
| 111 |
+
// Check if this looks like a cookie banner
|
| 112 |
+
if (
|
| 113 |
+
text.toLowerCase().includes('cookie') ||
|
| 114 |
+
text.toLowerCase().includes('consent') ||
|
| 115 |
+
text.toLowerCase().includes('privacy policy') ||
|
| 116 |
+
className.toLowerCase().includes('cookie') ||
|
| 117 |
+
className.toLowerCase().includes('consent') ||
|
| 118 |
+
className.toLowerCase().includes('gdpr') ||
|
| 119 |
+
id.toLowerCase().includes('cookie') ||
|
| 120 |
+
id.toLowerCase().includes('consent')
|
| 121 |
+
) {
|
| 122 |
+
console.log('Removing detected cookie banner:', element);
|
| 123 |
+
element.remove();
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
});
|
| 127 |
+
});
|
| 128 |
+
});
|
| 129 |
+
|
| 130 |
+
observer.observe(document.body, { childList: true, subtree: true });
|
| 131 |
+
|
| 132 |
+
// Set up periodic cleanup
|
| 133 |
+
setInterval(() => {
|
| 134 |
+
const cookieElements = document.querySelectorAll(`
|
| 135 |
+
[id*="cookie" i]:not(img):not(input),
|
| 136 |
+
[class*="cookie" i]:not(img):not(input),
|
| 137 |
+
[data-testid*="cookie" i],
|
| 138 |
+
.gdpr-banner, .consent-banner, .privacy-banner,
|
| 139 |
+
.onetrust-banner-sdk, #onetrust-consent-sdk,
|
| 140 |
+
.cmp-banner, .cc-banner
|
| 141 |
+
`);
|
| 142 |
+
cookieElements.forEach(el => el.remove());
|
| 143 |
+
|
| 144 |
+
// Restore body scroll
|
| 145 |
+
document.body.style.overflow = 'auto';
|
| 146 |
+
document.documentElement.style.overflow = 'auto';
|
| 147 |
+
}, 1000);
|
| 148 |
+
});
|
| 149 |
+
|
| 150 |
+
return true;
|
| 151 |
+
};
|
| 152 |
+
|
| 153 |
+
/**
|
| 154 |
+
* Enhanced content unblurring and premium bypass (integrated from extension script)
|
| 155 |
+
*/
|
| 156 |
+
const unblurContent = async (page) => {
|
| 157 |
+
console.log("π Unblurring content and bypassing premium restrictions...");
|
| 158 |
+
|
| 159 |
+
await page.evaluate(() => {
|
| 160 |
+
// Function to remove all visual restrictions
|
| 161 |
+
const removeRestrictions = () => {
|
| 162 |
+
const removeBySelector = (selector) => {
|
| 163 |
+
document.querySelectorAll(selector).forEach(el => el.remove());
|
| 164 |
+
};
|
| 165 |
+
|
| 166 |
+
// Remove ads by known class or ID
|
| 167 |
+
removeBySelector("#adbox");
|
| 168 |
+
removeBySelector(".adsbox");
|
| 169 |
+
removeBySelector(".ad-box");
|
| 170 |
+
removeBySelector(".banner-ads");
|
| 171 |
+
removeBySelector(".advert");
|
| 172 |
+
|
| 173 |
+
// Remove premium banner container
|
| 174 |
+
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
|
| 175 |
+
|
| 176 |
+
// Enhanced blur removal
|
| 177 |
+
const removeBlur = (element = document) => {
|
| 178 |
+
element.querySelectorAll("*").forEach(el => {
|
| 179 |
+
const style = window.getComputedStyle(el);
|
| 180 |
+
|
| 181 |
+
// Check for blur via filter, backdrop-filter, or class names
|
| 182 |
+
if (
|
| 183 |
+
style.filter?.includes("blur") ||
|
| 184 |
+
style.backdropFilter?.includes("blur") ||
|
| 185 |
+
parseFloat(style.opacity) < 1 ||
|
| 186 |
+
(el.className && el.className.toString().toLowerCase().includes("blur")) ||
|
| 187 |
+
(el.className && el.className.toString().toLowerCase().includes("premium"))
|
| 188 |
+
) {
|
| 189 |
+
el.style.filter = "none !important";
|
| 190 |
+
el.style.backdropFilter = "none !important";
|
| 191 |
+
el.style.opacity = "1 !important";
|
| 192 |
+
if (el.classList) {
|
| 193 |
+
el.classList.remove("blur", "blurred", "premium-blur");
|
| 194 |
+
}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
// Check parent elements for blur-inducing styles
|
| 198 |
+
const parent = el.parentElement;
|
| 199 |
+
if (parent) {
|
| 200 |
+
const parentStyle = window.getComputedStyle(parent);
|
| 201 |
+
if (
|
| 202 |
+
parentStyle.filter?.includes("blur") ||
|
| 203 |
+
parentStyle.backdropFilter?.includes("blur") ||
|
| 204 |
+
parseFloat(parentStyle.opacity) < 1
|
| 205 |
+
) {
|
| 206 |
+
parent.style.filter = "none !important";
|
| 207 |
+
parent.style.backdropFilter = "none !important";
|
| 208 |
+
parent.style.opacity = "1 !important";
|
| 209 |
+
}
|
| 210 |
+
}
|
| 211 |
+
});
|
| 212 |
+
};
|
| 213 |
+
|
| 214 |
+
// Remove dark overlays and paywall-like elements
|
| 215 |
+
document.querySelectorAll("div, section, aside").forEach(el => {
|
| 216 |
+
const style = window.getComputedStyle(el);
|
| 217 |
+
if (
|
| 218 |
+
style.backgroundColor.includes("rgba") &&
|
| 219 |
+
(style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
|
| 220 |
+
(el.className && el.className.toString().toLowerCase().includes("overlay")) ||
|
| 221 |
+
(el.className && el.className.toString().toLowerCase().includes("paywall"))
|
| 222 |
+
) {
|
| 223 |
+
el.remove();
|
| 224 |
+
}
|
| 225 |
+
});
|
| 226 |
+
|
| 227 |
+
removeBlur();
|
| 228 |
+
|
| 229 |
+
// Remove other restrictions
|
| 230 |
+
removeBySelector('[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i]');
|
| 231 |
+
|
| 232 |
+
// Ensure document content is visible
|
| 233 |
+
const contentSelectors = [
|
| 234 |
+
'.document-content', '.page-content', '.content',
|
| 235 |
+
'[data-page]', '[data-testid*="document"]', '[data-testid*="page"]',
|
| 236 |
+
'.page', '.document-page', 'main', 'article'
|
| 237 |
+
];
|
| 238 |
+
contentSelectors.forEach(selector => {
|
| 239 |
+
document.querySelectorAll(selector).forEach(el => {
|
| 240 |
+
el.style.setProperty('filter', 'none', 'important');
|
| 241 |
+
el.style.setProperty('opacity', '1', 'important');
|
| 242 |
+
el.style.setProperty('visibility', 'visible', 'important');
|
| 243 |
+
el.style.setProperty('display', 'block', 'important');
|
| 244 |
+
el.style.setProperty('pointer-events', 'auto', 'important');
|
| 245 |
+
});
|
| 246 |
+
});
|
| 247 |
+
|
| 248 |
+
// Remove overlay divs that might be blocking content
|
| 249 |
+
const overlays = document.querySelectorAll(`
|
| 250 |
+
[class*="overlay" i], [class*="modal" i], [class*="popup" i],
|
| 251 |
+
[class*="banner" i], [style*="position: fixed"],
|
| 252 |
+
[style*="position: absolute"][style*="z-index"]
|
| 253 |
+
`);
|
| 254 |
+
overlays.forEach(overlay => {
|
| 255 |
+
const text = overlay.textContent || '';
|
| 256 |
+
if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
|
| 257 |
+
text.includes('cookie') || text.includes('consent') || text.includes('login')) {
|
| 258 |
+
overlay.remove();
|
| 259 |
+
}
|
| 260 |
+
});
|
| 261 |
+
};
|
| 262 |
+
|
| 263 |
+
// Run immediately
|
| 264 |
+
removeRestrictions();
|
| 265 |
+
|
| 266 |
+
// Run periodically
|
| 267 |
+
const intervalId = setInterval(removeRestrictions, 2000);
|
| 268 |
+
|
| 269 |
+
// Clean up after 60 seconds
|
| 270 |
+
setTimeout(() => {
|
| 271 |
+
clearInterval(intervalId);
|
| 272 |
+
}, 60000);
|
| 273 |
+
});
|
| 274 |
+
};
|
| 275 |
+
|
| 276 |
+
/**
|
| 277 |
+
* Apply print styles for clean PDF output (integrated from extension script with improvements)
|
| 278 |
+
*/
|
| 279 |
+
const applyPrintStyles = async (page) => {
|
| 280 |
+
console.log("π¨οΈ Applying print styles for clean PDF...");
|
| 281 |
+
|
| 282 |
+
await page.evaluate(() => {
|
| 283 |
+
const style = document.createElement("style");
|
| 284 |
+
style.id = "print-style-extension";
|
| 285 |
+
style.innerHTML = `
|
| 286 |
+
@page {
|
| 287 |
+
size: A4 portrait;
|
| 288 |
+
margin: 5mm;
|
| 289 |
+
}
|
| 290 |
+
@media print {
|
| 291 |
+
html, body {
|
| 292 |
+
margin: 0 !important;
|
| 293 |
+
padding: 0 !important;
|
| 294 |
+
overflow: visible !important;
|
| 295 |
+
}
|
| 296 |
+
header, footer, nav, aside, .no-print, .ads, .sidebar,
|
| 297 |
+
.premium-banner, .ViewerToolbar, .Layout_info-bar-wrapper__He0Ho,
|
| 298 |
+
.Sidebar_sidebar-scrollable__kqeBZ, .HeaderWrapper_header-wrapper__mCmf3,
|
| 299 |
+
.Layout_visible-content-bottom-wrapper-sticky__yaaAB,
|
| 300 |
+
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
|
| 301 |
+
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper,
|
| 302 |
+
#top-bar-wrapper, .Layout_sidebar-wrapper__unavM,
|
| 303 |
+
.Layout_is-open__9DQr4 {
|
| 304 |
+
display: none !important;
|
| 305 |
+
}
|
| 306 |
+
body {
|
| 307 |
+
background: white !important;
|
| 308 |
+
color: black !important;
|
| 309 |
+
}
|
| 310 |
+
* {
|
| 311 |
+
box-shadow: none !important;
|
| 312 |
+
background: transparent !important;
|
| 313 |
+
}
|
| 314 |
+
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
|
| 315 |
+
.Viewer_document-wrapper__XsO4j, .page-content {
|
| 316 |
+
display: flex !important;
|
| 317 |
+
flex-direction: column !important;
|
| 318 |
+
width: 100% !important;
|
| 319 |
+
max-width: 210mm !important;
|
| 320 |
+
margin: 0 auto !important;
|
| 321 |
+
}
|
| 322 |
+
[data-page], .page, .document-page, img {
|
| 323 |
+
page-break-after: always !important;
|
| 324 |
+
page-break-inside: avoid !important;
|
| 325 |
+
page-break-before: avoid !important;
|
| 326 |
+
width: 100% !important;
|
| 327 |
+
max-width: 100% !important;
|
| 328 |
+
height: auto !important;
|
| 329 |
+
}
|
| 330 |
+
}
|
| 331 |
+
`;
|
| 332 |
+
document.head.appendChild(style);
|
| 333 |
+
});
|
| 334 |
+
};
|
| 335 |
+
|
| 336 |
+
/**
|
| 337 |
+
* Enhanced StuDocu downloader with comprehensive bypasses and login support
|
| 338 |
+
*/
|
| 339 |
+
const studocuDownloader = async (url, options = {}) => {
|
| 340 |
+
let browser;
|
| 341 |
+
try {
|
| 342 |
+
console.log("π Launching browser with stealth configuration...");
|
| 343 |
+
// Replace this part in your server.js (around line 343)
|
| 344 |
+
browser = await puppeteer.launch({
|
| 345 |
+
headless: "new", // Use new headless mode
|
| 346 |
+
args: [
|
| 347 |
+
'--no-sandbox',
|
| 348 |
+
'--disable-setuid-sandbox',
|
| 349 |
+
'--disable-dev-shm-usage',
|
| 350 |
+
'--disable-accelerated-2d-canvas',
|
| 351 |
+
'--no-first-run',
|
| 352 |
+
'--no-zygote',
|
| 353 |
+
'--disable-gpu',
|
| 354 |
+
'--disable-features=VizDisplayCompositor',
|
| 355 |
+
'--disable-background-networking',
|
| 356 |
+
'--disable-background-timer-throttling',
|
| 357 |
+
'--disable-renderer-backgrounding',
|
| 358 |
+
'--disable-backgrounding-occluded-windows',
|
| 359 |
+
'--disable-ipc-flooding-protection',
|
| 360 |
+
'--disable-web-security',
|
| 361 |
+
'--disable-features=site-per-process',
|
| 362 |
+
'--disable-blink-features=AutomationControlled',
|
| 363 |
+
'--disable-extensions',
|
| 364 |
+
'--single-process', // Important for containers
|
| 365 |
+
'--disable-background-tasks',
|
| 366 |
+
'--disable-default-apps',
|
| 367 |
+
'--disable-sync',
|
| 368 |
+
'--metrics-recording-only',
|
| 369 |
+
'--no-default-browser-check',
|
| 370 |
+
'--no-pings',
|
| 371 |
+
'--password-store=basic',
|
| 372 |
+
'--use-mock-keychain',
|
| 373 |
+
'--disable-gpu-sandbox'
|
| 374 |
+
],
|
| 375 |
+
timeout: 300000,
|
| 376 |
+
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'
|
| 377 |
+
});
|
| 378 |
+
|
| 379 |
+
const page = await browser.newPage();
|
| 380 |
+
|
| 381 |
+
// Set realistic browser characteristics
|
| 382 |
+
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
| 383 |
+
await page.setViewport({ width: 794, height: 1122 });
|
| 384 |
+
|
| 385 |
+
// Hide webdriver property
|
| 386 |
+
await page.evaluateOnNewDocument(() => {
|
| 387 |
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
| 388 |
+
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
| 389 |
+
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
|
| 390 |
+
});
|
| 391 |
+
|
| 392 |
+
// Set up cookie and content bypass
|
| 393 |
+
await bypassCookiesAndRestrictions(page);
|
| 394 |
+
|
| 395 |
+
// Block unnecessary resources
|
| 396 |
+
await page.setRequestInterception(true);
|
| 397 |
+
page.on('request', (req) => {
|
| 398 |
+
const resourceType = req.resourceType();
|
| 399 |
+
const reqUrl = req.url();
|
| 400 |
+
|
| 401 |
+
// Block trackers, ads, and analytics
|
| 402 |
+
if (
|
| 403 |
+
reqUrl.includes('doubleclick') ||
|
| 404 |
+
reqUrl.includes('googletagmanager') ||
|
| 405 |
+
reqUrl.includes('facebook.com') ||
|
| 406 |
+
reqUrl.includes('twitter.com') ||
|
| 407 |
+
reqUrl.includes('analytics') ||
|
| 408 |
+
reqUrl.includes('gtm') ||
|
| 409 |
+
reqUrl.includes('hotjar') ||
|
| 410 |
+
reqUrl.includes('mixpanel') ||
|
| 411 |
+
reqUrl.includes('onetrust') ||
|
| 412 |
+
reqUrl.includes('cookielaw') ||
|
| 413 |
+
resourceType === 'other' && reqUrl.includes('track')
|
| 414 |
+
) {
|
| 415 |
+
req.abort();
|
| 416 |
+
} else {
|
| 417 |
+
req.continue();
|
| 418 |
+
}
|
| 419 |
+
});
|
| 420 |
+
|
| 421 |
+
// Login if credentials provided (for premium content)
|
| 422 |
+
if (options.email && options.password) {
|
| 423 |
+
console.log("π Logging in to StuDocu...");
|
| 424 |
+
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 60000 });
|
| 425 |
+
await page.waitForSelector('#email', { timeout: 15000 });
|
| 426 |
+
await page.type('#email', options.email);
|
| 427 |
+
await page.type('#password', options.password);
|
| 428 |
+
await page.click('button[type="submit"]');
|
| 429 |
+
try {
|
| 430 |
+
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
|
| 431 |
+
// Additional check for successful login
|
| 432 |
+
await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
|
| 433 |
+
console.log("β
Login successful.");
|
| 434 |
+
} catch (e) {
|
| 435 |
+
console.error("β Login failed:", e.message);
|
| 436 |
+
throw new Error("Login failed. Check credentials, if CAPTCHA is present, or try again.");
|
| 437 |
+
}
|
| 438 |
+
} else {
|
| 439 |
+
console.log("β οΈ No login credentials provided. Full unblurred content requires premium account.");
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
console.log(`π Navigating to ${url}...`);
|
| 443 |
+
|
| 444 |
+
// Navigate with retry logic
|
| 445 |
+
let navigationSuccess = false;
|
| 446 |
+
let attempts = 0;
|
| 447 |
+
const maxAttempts = 3;
|
| 448 |
+
while (!navigationSuccess && attempts < maxAttempts) {
|
| 449 |
+
try {
|
| 450 |
+
attempts++;
|
| 451 |
+
console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
|
| 452 |
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
|
| 453 |
+
navigationSuccess = true;
|
| 454 |
+
} catch (e) {
|
| 455 |
+
console.log(`Navigation attempt ${attempts} failed:`, e.message);
|
| 456 |
+
if (attempts >= maxAttempts) throw e;
|
| 457 |
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
// Wait for initial load
|
| 462 |
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
| 463 |
+
|
| 464 |
+
// Apply content unblurring
|
| 465 |
+
await unblurContent(page);
|
| 466 |
+
|
| 467 |
+
// Wait for document content with multiple selectors
|
| 468 |
+
console.log("β³ Waiting for document content to load...");
|
| 469 |
+
const contentSelectors = [
|
| 470 |
+
'.document-content', '.page-content', '[data-page]',
|
| 471 |
+
'[data-testid*="document"]', 'img[src*="document"]',
|
| 472 |
+
'img[src*="page"]', '.page', 'main img', 'article img'
|
| 473 |
+
];
|
| 474 |
+
let contentFound = false;
|
| 475 |
+
for (const selector of contentSelectors) {
|
| 476 |
+
try {
|
| 477 |
+
await page.waitForSelector(selector, { timeout: 20000 });
|
| 478 |
+
console.log(`β
Found content with selector: ${selector}`);
|
| 479 |
+
contentFound = true;
|
| 480 |
+
break;
|
| 481 |
+
} catch (e) {
|
| 482 |
+
console.log(`β Selector ${selector} not found, trying next...`);
|
| 483 |
+
}
|
| 484 |
+
}
|
| 485 |
+
if (!contentFound) {
|
| 486 |
+
console.log("β οΈ No specific content selector found, proceeding with page content...");
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
// Enhanced scrolling to load all content with loop for stability
|
| 490 |
+
console.log("π Loading all document pages with enhanced slow scroll...");
|
| 491 |
+
await page.evaluate(async () => {
|
| 492 |
+
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
|
| 493 |
+
|
| 494 |
+
let scrollHeight = document.body.scrollHeight;
|
| 495 |
+
while (true) {
|
| 496 |
+
let totalHeight = 0;
|
| 497 |
+
const distance = 300;
|
| 498 |
+
while (totalHeight < scrollHeight) {
|
| 499 |
+
window.scrollBy(0, distance);
|
| 500 |
+
totalHeight += distance;
|
| 501 |
+
await delay(500); // Increased delay for better loading
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
await delay(2000); // Extra wait after reaching bottom
|
| 505 |
+
|
| 506 |
+
const newHeight = document.body.scrollHeight;
|
| 507 |
+
if (newHeight === scrollHeight) break;
|
| 508 |
+
scrollHeight = newHeight;
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
+
// Scroll to top
|
| 512 |
+
window.scrollTo({ top: 0, behavior: "smooth" });
|
| 513 |
+
await delay(1000);
|
| 514 |
+
});
|
| 515 |
+
|
| 516 |
+
// Re-apply unblur after loading new content
|
| 517 |
+
await unblurContent(page);
|
| 518 |
+
|
| 519 |
+
// Wait for all images to load
|
| 520 |
+
console.log("πΌοΈ Waiting for all images to load...");
|
| 521 |
+
await page.evaluate(async () => {
|
| 522 |
+
const images = Array.from(document.querySelectorAll('img'));
|
| 523 |
+
await Promise.all(images.map(img => {
|
| 524 |
+
if (img.complete) return Promise.resolve();
|
| 525 |
+
return new Promise((resolve) => {
|
| 526 |
+
img.addEventListener('load', resolve);
|
| 527 |
+
img.addEventListener('error', resolve);
|
| 528 |
+
setTimeout(resolve, 15000);
|
| 529 |
+
});
|
| 530 |
+
}));
|
| 531 |
+
});
|
| 532 |
+
|
| 533 |
+
// Additional wait for any lazy loading
|
| 534 |
+
await new Promise(resolve => setTimeout(resolve, 10000));
|
| 535 |
+
|
| 536 |
+
// Set exact height to avoid extra blank pages
|
| 537 |
+
console.log("π Setting exact document height...");
|
| 538 |
+
await page.evaluate(() => {
|
| 539 |
+
const getDocumentHeight = () => Math.max(
|
| 540 |
+
document.body.scrollHeight, document.body.offsetHeight,
|
| 541 |
+
document.documentElement.clientHeight, document.documentElement.scrollHeight,
|
| 542 |
+
document.documentElement.offsetHeight
|
| 543 |
+
);
|
| 544 |
+
const height = getDocumentHeight();
|
| 545 |
+
document.body.style.height = `${height}px !important`;
|
| 546 |
+
document.documentElement.style.height = `${height}px !important`;
|
| 547 |
+
document.body.style.overflow = 'hidden !important';
|
| 548 |
+
});
|
| 549 |
+
|
| 550 |
+
// Final content verification
|
| 551 |
+
const contentCheck = await page.evaluate(() => {
|
| 552 |
+
const textContent = document.body.textContent || '';
|
| 553 |
+
const images = document.querySelectorAll('img');
|
| 554 |
+
const documentImages = Array.from(images).filter(img =>
|
| 555 |
+
img.src.includes('document') || img.src.includes('page') ||
|
| 556 |
+
img.alt.includes('document') || img.alt.includes('page')
|
| 557 |
+
);
|
| 558 |
+
return {
|
| 559 |
+
totalText: textContent.length,
|
| 560 |
+
totalImages: images.length,
|
| 561 |
+
documentImages: documentImages.length,
|
| 562 |
+
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000,
|
| 563 |
+
sampleText: textContent.substring(0, 300)
|
| 564 |
+
};
|
| 565 |
+
});
|
| 566 |
+
console.log("π Content verification:", {
|
| 567 |
+
textLength: contentCheck.totalText,
|
| 568 |
+
images: contentCheck.totalImages,
|
| 569 |
+
documentImages: contentCheck.documentImages,
|
| 570 |
+
hasContent: contentCheck.hasDocumentContent
|
| 571 |
+
});
|
| 572 |
+
|
| 573 |
+
if (!contentCheck.hasDocumentContent) {
|
| 574 |
+
console.warn("β οΈ Warning: Limited document content detected. Use premium credentials for full access.");
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
// Apply print styles
|
| 578 |
+
await applyPrintStyles(page);
|
| 579 |
+
|
| 580 |
+
// Emulate print media
|
| 581 |
+
await page.emulateMediaType('print');
|
| 582 |
+
|
| 583 |
+
// Generate PDF
|
| 584 |
+
console.log("π Generating PDF...");
|
| 585 |
+
const pdfBuffer = await page.pdf({
|
| 586 |
+
printBackground: true,
|
| 587 |
+
preferCSSPageSize: true,
|
| 588 |
+
displayHeaderFooter: false,
|
| 589 |
+
timeout: 180000,
|
| 590 |
+
scale: 1,
|
| 591 |
+
omitBackground: false
|
| 592 |
+
});
|
| 593 |
+
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
|
| 594 |
+
|
| 595 |
+
return pdfBuffer;
|
| 596 |
+
} catch (error) {
|
| 597 |
+
console.error("β Error during PDF generation:", error);
|
| 598 |
+
if (error.message.includes('timeout')) {
|
| 599 |
+
throw new Error("Request timed out. The document may be taking too long to load. Please try again.");
|
| 600 |
+
} else if (error.message.includes('net::')) {
|
| 601 |
+
throw new Error("Network error. Please check the URL and your internet connection.");
|
| 602 |
+
} else if (error.message.includes('ERR_BLOCKED')) {
|
| 603 |
+
throw new Error("Access blocked. Try again or check if the document is publicly accessible.");
|
| 604 |
+
} else {
|
| 605 |
+
throw new Error(`Failed to generate PDF: ${error.message}`);
|
| 606 |
+
}
|
| 607 |
+
} finally {
|
| 608 |
+
if (browser) {
|
| 609 |
+
console.log("π Closing browser...");
|
| 610 |
+
try {
|
| 611 |
+
await browser.close();
|
| 612 |
+
} catch (e) {
|
| 613 |
+
console.log("Error closing browser:", e.message);
|
| 614 |
+
}
|
| 615 |
+
}
|
| 616 |
+
}
|
| 617 |
+
};
|
| 618 |
+
|
| 619 |
+
// API Routes
|
| 620 |
+
app.post('/api/download', async (req, res) => {
|
| 621 |
+
const { url, filename, email, password } = req.body;
|
| 622 |
+
|
| 623 |
+
if (!url) {
|
| 624 |
+
return res.status(400).json({ error: 'URL is required.' });
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
if (!url.includes('studocu.com')) {
|
| 628 |
+
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
let normalizedUrl = url.trim();
|
| 632 |
+
if (!normalizedUrl.startsWith('http')) {
|
| 633 |
+
normalizedUrl = 'https://' + normalizedUrl;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
console.log(`π― Processing request for: ${normalizedUrl}`);
|
| 637 |
+
|
| 638 |
+
try {
|
| 639 |
+
const startTime = Date.now();
|
| 640 |
+
const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
|
| 641 |
+
const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
|
| 642 |
+
|
| 643 |
+
res.setHeader('Content-Type', 'application/pdf');
|
| 644 |
+
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
|
| 645 |
+
res.setHeader('Content-Length', pdfBuffer.length);
|
| 646 |
+
res.send(pdfBuffer);
|
| 647 |
+
|
| 648 |
+
console.log(`π Request completed successfully in ${processingTime}s`);
|
| 649 |
+
} catch (error) {
|
| 650 |
+
console.error(`β Failed to process ${normalizedUrl}:`, error.message);
|
| 651 |
+
res.status(500).json({ error: error.message });
|
| 652 |
+
}
|
| 653 |
+
});
|
| 654 |
+
|
| 655 |
+
app.get('/health', (req, res) => {
|
| 656 |
+
res.json({ status: 'healthy', timestamp: new Date().toISOString(), uptime: process.uptime() });
|
| 657 |
+
});
|
| 658 |
+
|
| 659 |
+
app.get('/', (req, res) => {
|
| 660 |
+
res.json({
|
| 661 |
+
message: 'π Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
|
| 662 |
+
version: '5.3',
|
| 663 |
+
features: [
|
| 664 |
+
'πͺ Advanced cookie banner bypass',
|
| 665 |
+
'π Premium content unblurring (client-side only; server-side blur requires premium login)',
|
| 666 |
+
'π Login support for full unblurred content access',
|
| 667 |
+
'π€ Anti-bot detection evasion',
|
| 668 |
+
'π Full document content extraction with print styles for clean PDF'
|
| 669 |
+
],
|
| 670 |
+
endpoints: {
|
| 671 |
+
download: 'POST /api/download (body: {url, filename?, email?, password?})',
|
| 672 |
+
health: 'GET /health'
|
| 673 |
+
},
|
| 674 |
+
note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
|
| 675 |
+
});
|
| 676 |
+
});
|
| 677 |
+
|
| 678 |
+
process.on('SIGTERM', () => {
|
| 679 |
+
console.log('SIGTERM received, shutting down gracefully...');
|
| 680 |
+
process.exit(0);
|
| 681 |
+
});
|
| 682 |
+
|
| 683 |
+
process.on('SIGINT', () => {
|
| 684 |
+
console.log('SIGINT received, shutting down gracefully...');
|
| 685 |
+
process.exit(0);
|
| 686 |
+
});
|
| 687 |
+
|
| 688 |
+
app.listen(port, () => {
|
| 689 |
+
console.log(`π Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
|
| 690 |
+
console.log(`β¨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
|
| 691 |
+
});
|
server.js
CHANGED
|
@@ -12,7 +12,6 @@ app.use(express.json());
|
|
| 12 |
*/
|
| 13 |
const bypassCookiesAndRestrictions = async (page) => {
|
| 14 |
console.log("πͺ Starting comprehensive cookie and restriction bypass...");
|
| 15 |
-
|
| 16 |
// Step 1: Set cookies before page load
|
| 17 |
const preCookies = [
|
| 18 |
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
|
|
@@ -24,7 +23,6 @@ const bypassCookiesAndRestrictions = async (page) => {
|
|
| 24 |
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
|
| 25 |
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
|
| 26 |
];
|
| 27 |
-
|
| 28 |
for (const cookie of preCookies) {
|
| 29 |
try {
|
| 30 |
await page.setCookie(cookie);
|
|
@@ -36,58 +34,42 @@ const bypassCookiesAndRestrictions = async (page) => {
|
|
| 36 |
// Step 2: Inject CSS to hide cookie banners immediately
|
| 37 |
await page.addStyleTag({
|
| 38 |
content: `
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
opacity: 1 !important;
|
| 76 |
-
visibility: visible !important;
|
| 77 |
-
pointer-events: auto !important;
|
| 78 |
-
}
|
| 79 |
-
|
| 80 |
-
/* Remove fixed overlays */
|
| 81 |
-
.fixed-overlay, .sticky-overlay, .content-overlay {
|
| 82 |
-
display: none !important;
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
/* Restore scrolling */
|
| 86 |
-
html, body {
|
| 87 |
-
overflow: auto !important;
|
| 88 |
-
position: static !important;
|
| 89 |
-
}
|
| 90 |
-
`
|
| 91 |
});
|
| 92 |
|
| 93 |
// Step 3: Inject JavaScript to handle dynamic cookie banners
|
|
@@ -107,7 +89,6 @@ const bypassCookiesAndRestrictions = async (page) => {
|
|
| 107 |
const text = element.textContent || '';
|
| 108 |
const className = element.className || '';
|
| 109 |
const id = element.id || '';
|
| 110 |
-
|
| 111 |
// Check if this looks like a cookie banner
|
| 112 |
if (
|
| 113 |
text.toLowerCase().includes('cookie') ||
|
|
@@ -126,21 +107,16 @@ const bypassCookiesAndRestrictions = async (page) => {
|
|
| 126 |
});
|
| 127 |
});
|
| 128 |
});
|
| 129 |
-
|
| 130 |
observer.observe(document.body, { childList: true, subtree: true });
|
| 131 |
|
| 132 |
// Set up periodic cleanup
|
| 133 |
setInterval(() => {
|
| 134 |
const cookieElements = document.querySelectorAll(`
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
.onetrust-banner-sdk, #onetrust-consent-sdk,
|
| 140 |
-
.cmp-banner, .cc-banner
|
| 141 |
-
`);
|
| 142 |
cookieElements.forEach(el => el.remove());
|
| 143 |
-
|
| 144 |
// Restore body scroll
|
| 145 |
document.body.style.overflow = 'auto';
|
| 146 |
document.documentElement.style.overflow = 'auto';
|
|
@@ -155,7 +131,6 @@ const bypassCookiesAndRestrictions = async (page) => {
|
|
| 155 |
*/
|
| 156 |
const unblurContent = async (page) => {
|
| 157 |
console.log("π Unblurring content and bypassing premium restrictions...");
|
| 158 |
-
|
| 159 |
await page.evaluate(() => {
|
| 160 |
// Function to remove all visual restrictions
|
| 161 |
const removeRestrictions = () => {
|
|
@@ -177,7 +152,6 @@ const unblurContent = async (page) => {
|
|
| 177 |
const removeBlur = (element = document) => {
|
| 178 |
element.querySelectorAll("*").forEach(el => {
|
| 179 |
const style = window.getComputedStyle(el);
|
| 180 |
-
|
| 181 |
// Check for blur via filter, backdrop-filter, or class names
|
| 182 |
if (
|
| 183 |
style.filter?.includes("blur") ||
|
|
@@ -193,7 +167,6 @@ const unblurContent = async (page) => {
|
|
| 193 |
el.classList.remove("blur", "blurred", "premium-blur");
|
| 194 |
}
|
| 195 |
}
|
| 196 |
-
|
| 197 |
// Check parent elements for blur-inducing styles
|
| 198 |
const parent = el.parentElement;
|
| 199 |
if (parent) {
|
|
@@ -215,8 +188,7 @@ const unblurContent = async (page) => {
|
|
| 215 |
document.querySelectorAll("div, section, aside").forEach(el => {
|
| 216 |
const style = window.getComputedStyle(el);
|
| 217 |
if (
|
| 218 |
-
style.backgroundColor.includes("rgba") &&
|
| 219 |
-
(style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000) ||
|
| 220 |
(el.className && el.className.toString().toLowerCase().includes("overlay")) ||
|
| 221 |
(el.className && el.className.toString().toLowerCase().includes("paywall"))
|
| 222 |
) {
|
|
@@ -231,9 +203,8 @@ const unblurContent = async (page) => {
|
|
| 231 |
|
| 232 |
// Ensure document content is visible
|
| 233 |
const contentSelectors = [
|
| 234 |
-
'.document-content', '.page-content', '.content',
|
| 235 |
-
'[data-page]', '
|
| 236 |
-
'.page', '.document-page', 'main', 'article'
|
| 237 |
];
|
| 238 |
contentSelectors.forEach(selector => {
|
| 239 |
document.querySelectorAll(selector).forEach(el => {
|
|
@@ -247,14 +218,12 @@ const unblurContent = async (page) => {
|
|
| 247 |
|
| 248 |
// Remove overlay divs that might be blocking content
|
| 249 |
const overlays = document.querySelectorAll(`
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
`);
|
| 254 |
overlays.forEach(overlay => {
|
| 255 |
const text = overlay.textContent || '';
|
| 256 |
-
if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') ||
|
| 257 |
-
text.includes('cookie') || text.includes('consent') || text.includes('login')) {
|
| 258 |
overlay.remove();
|
| 259 |
}
|
| 260 |
});
|
|
@@ -278,57 +247,53 @@ const unblurContent = async (page) => {
|
|
| 278 |
*/
|
| 279 |
const applyPrintStyles = async (page) => {
|
| 280 |
console.log("π¨οΈ Applying print styles for clean PDF...");
|
| 281 |
-
|
| 282 |
await page.evaluate(() => {
|
| 283 |
const style = document.createElement("style");
|
| 284 |
style.id = "print-style-extension";
|
| 285 |
style.innerHTML = `
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
}
|
| 330 |
-
}
|
| 331 |
-
`;
|
| 332 |
document.head.appendChild(style);
|
| 333 |
});
|
| 334 |
};
|
|
@@ -340,9 +305,8 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 340 |
let browser;
|
| 341 |
try {
|
| 342 |
console.log("π Launching browser with stealth configuration...");
|
| 343 |
-
// Replace this part in your server.js (around line 343)
|
| 344 |
browser = await puppeteer.launch({
|
| 345 |
-
headless:
|
| 346 |
args: [
|
| 347 |
'--no-sandbox',
|
| 348 |
'--disable-setuid-sandbox',
|
|
@@ -360,20 +324,9 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 360 |
'--disable-web-security',
|
| 361 |
'--disable-features=site-per-process',
|
| 362 |
'--disable-blink-features=AutomationControlled',
|
| 363 |
-
'--disable-extensions'
|
| 364 |
-
'--single-process', // Important for containers
|
| 365 |
-
'--disable-background-tasks',
|
| 366 |
-
'--disable-default-apps',
|
| 367 |
-
'--disable-sync',
|
| 368 |
-
'--metrics-recording-only',
|
| 369 |
-
'--no-default-browser-check',
|
| 370 |
-
'--no-pings',
|
| 371 |
-
'--password-store=basic',
|
| 372 |
-
'--use-mock-keychain',
|
| 373 |
-
'--disable-gpu-sandbox'
|
| 374 |
],
|
| 375 |
timeout: 300000,
|
| 376 |
-
executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/google-chrome-stable'
|
| 377 |
});
|
| 378 |
|
| 379 |
const page = await browser.newPage();
|
|
@@ -397,7 +350,6 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 397 |
page.on('request', (req) => {
|
| 398 |
const resourceType = req.resourceType();
|
| 399 |
const reqUrl = req.url();
|
| 400 |
-
|
| 401 |
// Block trackers, ads, and analytics
|
| 402 |
if (
|
| 403 |
reqUrl.includes('doubleclick') ||
|
|
@@ -410,7 +362,7 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 410 |
reqUrl.includes('mixpanel') ||
|
| 411 |
reqUrl.includes('onetrust') ||
|
| 412 |
reqUrl.includes('cookielaw') ||
|
| 413 |
-
resourceType === 'other' && reqUrl.includes('track')
|
| 414 |
) {
|
| 415 |
req.abort();
|
| 416 |
} else {
|
|
@@ -440,7 +392,6 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 440 |
}
|
| 441 |
|
| 442 |
console.log(`π Navigating to ${url}...`);
|
| 443 |
-
|
| 444 |
// Navigate with retry logic
|
| 445 |
let navigationSuccess = false;
|
| 446 |
let attempts = 0;
|
|
@@ -467,9 +418,8 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 467 |
// Wait for document content with multiple selectors
|
| 468 |
console.log("β³ Waiting for document content to load...");
|
| 469 |
const contentSelectors = [
|
| 470 |
-
'.document-content', '.page-content', '[data-page]',
|
| 471 |
-
'[
|
| 472 |
-
'img[src*="page"]', '.page', 'main img', 'article img'
|
| 473 |
];
|
| 474 |
let contentFound = false;
|
| 475 |
for (const selector of contentSelectors) {
|
|
@@ -482,6 +432,7 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 482 |
console.log(`β Selector ${selector} not found, trying next...`);
|
| 483 |
}
|
| 484 |
}
|
|
|
|
| 485 |
if (!contentFound) {
|
| 486 |
console.log("β οΈ No specific content selector found, proceeding with page content...");
|
| 487 |
}
|
|
@@ -490,7 +441,6 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 490 |
console.log("π Loading all document pages with enhanced slow scroll...");
|
| 491 |
await page.evaluate(async () => {
|
| 492 |
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
|
| 493 |
-
|
| 494 |
let scrollHeight = document.body.scrollHeight;
|
| 495 |
while (true) {
|
| 496 |
let totalHeight = 0;
|
|
@@ -500,14 +450,11 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 500 |
totalHeight += distance;
|
| 501 |
await delay(500); // Increased delay for better loading
|
| 502 |
}
|
| 503 |
-
|
| 504 |
await delay(2000); // Extra wait after reaching bottom
|
| 505 |
-
|
| 506 |
const newHeight = document.body.scrollHeight;
|
| 507 |
if (newHeight === scrollHeight) break;
|
| 508 |
scrollHeight = newHeight;
|
| 509 |
}
|
| 510 |
-
|
| 511 |
// Scroll to top
|
| 512 |
window.scrollTo({ top: 0, behavior: "smooth" });
|
| 513 |
await delay(1000);
|
|
@@ -538,8 +485,7 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 538 |
await page.evaluate(() => {
|
| 539 |
const getDocumentHeight = () => Math.max(
|
| 540 |
document.body.scrollHeight, document.body.offsetHeight,
|
| 541 |
-
document.documentElement.clientHeight, document.documentElement.scrollHeight,
|
| 542 |
-
document.documentElement.offsetHeight
|
| 543 |
);
|
| 544 |
const height = getDocumentHeight();
|
| 545 |
document.body.style.height = `${height}px !important`;
|
|
@@ -590,9 +536,10 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 590 |
scale: 1,
|
| 591 |
omitBackground: false
|
| 592 |
});
|
| 593 |
-
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
|
| 594 |
|
|
|
|
| 595 |
return pdfBuffer;
|
|
|
|
| 596 |
} catch (error) {
|
| 597 |
console.error("β Error during PDF generation:", error);
|
| 598 |
if (error.message.includes('timeout')) {
|
|
@@ -616,14 +563,144 @@ const studocuDownloader = async (url, options = {}) => {
|
|
| 616 |
}
|
| 617 |
};
|
| 618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
// API Routes
|
|
|
|
|
|
|
| 620 |
app.post('/api/download', async (req, res) => {
|
| 621 |
const { url, filename, email, password } = req.body;
|
| 622 |
-
|
| 623 |
if (!url) {
|
| 624 |
return res.status(400).json({ error: 'URL is required.' });
|
| 625 |
}
|
| 626 |
-
|
| 627 |
if (!url.includes('studocu.com')) {
|
| 628 |
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
|
| 629 |
}
|
|
@@ -634,17 +711,14 @@ app.post('/api/download', async (req, res) => {
|
|
| 634 |
}
|
| 635 |
|
| 636 |
console.log(`π― Processing request for: ${normalizedUrl}`);
|
| 637 |
-
|
| 638 |
try {
|
| 639 |
const startTime = Date.now();
|
| 640 |
const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
|
| 641 |
const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
|
| 642 |
-
|
| 643 |
res.setHeader('Content-Type', 'application/pdf');
|
| 644 |
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
|
| 645 |
res.setHeader('Content-Length', pdfBuffer.length);
|
| 646 |
res.send(pdfBuffer);
|
| 647 |
-
|
| 648 |
console.log(`π Request completed successfully in ${processingTime}s`);
|
| 649 |
} catch (error) {
|
| 650 |
console.error(`β Failed to process ${normalizedUrl}:`, error.message);
|
|
@@ -652,23 +726,55 @@ app.post('/api/download', async (req, res) => {
|
|
| 652 |
}
|
| 653 |
});
|
| 654 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
app.get('/health', (req, res) => {
|
| 656 |
-
res.json({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
});
|
| 658 |
|
| 659 |
app.get('/', (req, res) => {
|
| 660 |
res.json({
|
| 661 |
-
message: 'π Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles',
|
| 662 |
-
version: '5.3',
|
| 663 |
features: [
|
| 664 |
'πͺ Advanced cookie banner bypass',
|
| 665 |
'π Premium content unblurring (client-side only; server-side blur requires premium login)',
|
| 666 |
'π Login support for full unblurred content access',
|
| 667 |
'π€ Anti-bot detection evasion',
|
| 668 |
-
'π Full document content extraction with print styles for clean PDF'
|
|
|
|
| 669 |
],
|
| 670 |
endpoints: {
|
| 671 |
download: 'POST /api/download (body: {url, filename?, email?, password?})',
|
|
|
|
| 672 |
health: 'GET /health'
|
| 673 |
},
|
| 674 |
note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
|
|
@@ -686,6 +792,6 @@ process.on('SIGINT', () => {
|
|
| 686 |
});
|
| 687 |
|
| 688 |
app.listen(port, () => {
|
| 689 |
-
console.log(`π Enhanced StuDocu Downloader v5.3 running on http://localhost:${port}`);
|
| 690 |
-
console.log(`β¨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection`);
|
| 691 |
});
|
|
|
|
| 12 |
*/
|
| 13 |
const bypassCookiesAndRestrictions = async (page) => {
|
| 14 |
console.log("πͺ Starting comprehensive cookie and restriction bypass...");
|
|
|
|
| 15 |
// Step 1: Set cookies before page load
|
| 16 |
const preCookies = [
|
| 17 |
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
|
|
|
|
| 23 |
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
|
| 24 |
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
|
| 25 |
];
|
|
|
|
| 26 |
for (const cookie of preCookies) {
|
| 27 |
try {
|
| 28 |
await page.setCookie(cookie);
|
|
|
|
| 34 |
// Step 2: Inject CSS to hide cookie banners immediately
|
| 35 |
await page.addStyleTag({
|
| 36 |
content: `
|
| 37 |
+
/* Hide all possible cookie banners */
|
| 38 |
+
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i],
|
| 39 |
+
.gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal,
|
| 40 |
+
.cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal,
|
| 41 |
+
[class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance,
|
| 42 |
+
div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") {
|
| 43 |
+
display: none !important;
|
| 44 |
+
visibility: hidden !important;
|
| 45 |
+
opacity: 0 !important;
|
| 46 |
+
z-index: -9999 !important;
|
| 47 |
+
pointer-events: none !important;
|
| 48 |
+
}
|
| 49 |
+
/* Remove blur and premium overlays */
|
| 50 |
+
[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i] {
|
| 51 |
+
filter: none !important;
|
| 52 |
+
backdrop-filter: none !important;
|
| 53 |
+
opacity: 1 !important;
|
| 54 |
+
visibility: visible !important;
|
| 55 |
+
}
|
| 56 |
+
/* Ensure document content is visible */
|
| 57 |
+
.document-content, .page-content, [data-page] {
|
| 58 |
+
filter: none !important;
|
| 59 |
+
opacity: 1 !important;
|
| 60 |
+
visibility: visible !important;
|
| 61 |
+
pointer-events: auto !important;
|
| 62 |
+
}
|
| 63 |
+
/* Remove fixed overlays */
|
| 64 |
+
.fixed-overlay, .sticky-overlay, .content-overlay {
|
| 65 |
+
display: none !important;
|
| 66 |
+
}
|
| 67 |
+
/* Restore scrolling */
|
| 68 |
+
html, body {
|
| 69 |
+
overflow: auto !important;
|
| 70 |
+
position: static !important;
|
| 71 |
+
}
|
| 72 |
+
`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
});
|
| 74 |
|
| 75 |
// Step 3: Inject JavaScript to handle dynamic cookie banners
|
|
|
|
| 89 |
const text = element.textContent || '';
|
| 90 |
const className = element.className || '';
|
| 91 |
const id = element.id || '';
|
|
|
|
| 92 |
// Check if this looks like a cookie banner
|
| 93 |
if (
|
| 94 |
text.toLowerCase().includes('cookie') ||
|
|
|
|
| 107 |
});
|
| 108 |
});
|
| 109 |
});
|
|
|
|
| 110 |
observer.observe(document.body, { childList: true, subtree: true });
|
| 111 |
|
| 112 |
// Set up periodic cleanup
|
| 113 |
setInterval(() => {
|
| 114 |
const cookieElements = document.querySelectorAll(`
|
| 115 |
+
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i],
|
| 116 |
+
.gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk,
|
| 117 |
+
.cmp-banner, .cc-banner
|
| 118 |
+
`);
|
|
|
|
|
|
|
|
|
|
| 119 |
cookieElements.forEach(el => el.remove());
|
|
|
|
| 120 |
// Restore body scroll
|
| 121 |
document.body.style.overflow = 'auto';
|
| 122 |
document.documentElement.style.overflow = 'auto';
|
|
|
|
| 131 |
*/
|
| 132 |
const unblurContent = async (page) => {
|
| 133 |
console.log("π Unblurring content and bypassing premium restrictions...");
|
|
|
|
| 134 |
await page.evaluate(() => {
|
| 135 |
// Function to remove all visual restrictions
|
| 136 |
const removeRestrictions = () => {
|
|
|
|
| 152 |
const removeBlur = (element = document) => {
|
| 153 |
element.querySelectorAll("*").forEach(el => {
|
| 154 |
const style = window.getComputedStyle(el);
|
|
|
|
| 155 |
// Check for blur via filter, backdrop-filter, or class names
|
| 156 |
if (
|
| 157 |
style.filter?.includes("blur") ||
|
|
|
|
| 167 |
el.classList.remove("blur", "blurred", "premium-blur");
|
| 168 |
}
|
| 169 |
}
|
|
|
|
| 170 |
// Check parent elements for blur-inducing styles
|
| 171 |
const parent = el.parentElement;
|
| 172 |
if (parent) {
|
|
|
|
| 188 |
document.querySelectorAll("div, section, aside").forEach(el => {
|
| 189 |
const style = window.getComputedStyle(el);
|
| 190 |
if (
|
| 191 |
+
(style.backgroundColor.includes("rgba") && (style.backgroundColor.includes("0.5") || parseFloat(style.zIndex) > 1000)) ||
|
|
|
|
| 192 |
(el.className && el.className.toString().toLowerCase().includes("overlay")) ||
|
| 193 |
(el.className && el.className.toString().toLowerCase().includes("paywall"))
|
| 194 |
) {
|
|
|
|
| 203 |
|
| 204 |
// Ensure document content is visible
|
| 205 |
const contentSelectors = [
|
| 206 |
+
'.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
|
| 207 |
+
'[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
|
|
|
|
| 208 |
];
|
| 209 |
contentSelectors.forEach(selector => {
|
| 210 |
document.querySelectorAll(selector).forEach(el => {
|
|
|
|
| 218 |
|
| 219 |
// Remove overlay divs that might be blocking content
|
| 220 |
const overlays = document.querySelectorAll(`
|
| 221 |
+
[class*="overlay" i], [class*="modal" i], [class*="popup" i], [class*="banner" i],
|
| 222 |
+
[style*="position: fixed"], [style*="position: absolute"][style*="z-index"]
|
| 223 |
+
`);
|
|
|
|
| 224 |
overlays.forEach(overlay => {
|
| 225 |
const text = overlay.textContent || '';
|
| 226 |
+
if (text.includes('premium') || text.includes('unlock') || text.includes('subscribe') || text.includes('cookie') || text.includes('consent') || text.includes('login')) {
|
|
|
|
| 227 |
overlay.remove();
|
| 228 |
}
|
| 229 |
});
|
|
|
|
| 247 |
*/
|
| 248 |
const applyPrintStyles = async (page) => {
|
| 249 |
console.log("π¨οΈ Applying print styles for clean PDF...");
|
|
|
|
| 250 |
await page.evaluate(() => {
|
| 251 |
const style = document.createElement("style");
|
| 252 |
style.id = "print-style-extension";
|
| 253 |
style.innerHTML = `
|
| 254 |
+
@page {
|
| 255 |
+
size: A4 portrait;
|
| 256 |
+
margin: 5mm;
|
| 257 |
+
}
|
| 258 |
+
@media print {
|
| 259 |
+
html, body {
|
| 260 |
+
margin: 0 !important;
|
| 261 |
+
padding: 0 !important;
|
| 262 |
+
overflow: visible !important;
|
| 263 |
+
}
|
| 264 |
+
header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
|
| 265 |
+
.ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
|
| 266 |
+
.HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
|
| 267 |
+
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
|
| 268 |
+
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper,
|
| 269 |
+
.Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
|
| 270 |
+
display: none !important;
|
| 271 |
+
}
|
| 272 |
+
body {
|
| 273 |
+
background: white !important;
|
| 274 |
+
color: black !important;
|
| 275 |
+
}
|
| 276 |
+
* {
|
| 277 |
+
box-shadow: none !important;
|
| 278 |
+
background: transparent !important;
|
| 279 |
+
}
|
| 280 |
+
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, .Viewer_document-wrapper__XsO4j, .page-content {
|
| 281 |
+
display: flex !important;
|
| 282 |
+
flex-direction: column !important;
|
| 283 |
+
width: 100% !important;
|
| 284 |
+
max-width: 210mm !important;
|
| 285 |
+
margin: 0 auto !important;
|
| 286 |
+
}
|
| 287 |
+
[data-page], .page, .document-page, img {
|
| 288 |
+
page-break-after: always !important;
|
| 289 |
+
page-break-inside: avoid !important;
|
| 290 |
+
page-break-before: avoid !important;
|
| 291 |
+
width: 100% !important;
|
| 292 |
+
max-width: 100% !important;
|
| 293 |
+
height: auto !important;
|
| 294 |
+
}
|
| 295 |
+
}
|
| 296 |
+
`;
|
|
|
|
|
|
|
|
|
|
| 297 |
document.head.appendChild(style);
|
| 298 |
});
|
| 299 |
};
|
|
|
|
| 305 |
let browser;
|
| 306 |
try {
|
| 307 |
console.log("π Launching browser with stealth configuration...");
|
|
|
|
| 308 |
browser = await puppeteer.launch({
|
| 309 |
+
headless: true,
|
| 310 |
args: [
|
| 311 |
'--no-sandbox',
|
| 312 |
'--disable-setuid-sandbox',
|
|
|
|
| 324 |
'--disable-web-security',
|
| 325 |
'--disable-features=site-per-process',
|
| 326 |
'--disable-blink-features=AutomationControlled',
|
| 327 |
+
'--disable-extensions'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
],
|
| 329 |
timeout: 300000,
|
|
|
|
| 330 |
});
|
| 331 |
|
| 332 |
const page = await browser.newPage();
|
|
|
|
| 350 |
page.on('request', (req) => {
|
| 351 |
const resourceType = req.resourceType();
|
| 352 |
const reqUrl = req.url();
|
|
|
|
| 353 |
// Block trackers, ads, and analytics
|
| 354 |
if (
|
| 355 |
reqUrl.includes('doubleclick') ||
|
|
|
|
| 362 |
reqUrl.includes('mixpanel') ||
|
| 363 |
reqUrl.includes('onetrust') ||
|
| 364 |
reqUrl.includes('cookielaw') ||
|
| 365 |
+
(resourceType === 'other' && reqUrl.includes('track'))
|
| 366 |
) {
|
| 367 |
req.abort();
|
| 368 |
} else {
|
|
|
|
| 392 |
}
|
| 393 |
|
| 394 |
console.log(`π Navigating to ${url}...`);
|
|
|
|
| 395 |
// Navigate with retry logic
|
| 396 |
let navigationSuccess = false;
|
| 397 |
let attempts = 0;
|
|
|
|
| 418 |
// Wait for document content with multiple selectors
|
| 419 |
console.log("β³ Waiting for document content to load...");
|
| 420 |
const contentSelectors = [
|
| 421 |
+
'.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
|
| 422 |
+
'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
|
|
|
|
| 423 |
];
|
| 424 |
let contentFound = false;
|
| 425 |
for (const selector of contentSelectors) {
|
|
|
|
| 432 |
console.log(`β Selector ${selector} not found, trying next...`);
|
| 433 |
}
|
| 434 |
}
|
| 435 |
+
|
| 436 |
if (!contentFound) {
|
| 437 |
console.log("β οΈ No specific content selector found, proceeding with page content...");
|
| 438 |
}
|
|
|
|
| 441 |
console.log("π Loading all document pages with enhanced slow scroll...");
|
| 442 |
await page.evaluate(async () => {
|
| 443 |
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
|
|
|
|
| 444 |
let scrollHeight = document.body.scrollHeight;
|
| 445 |
while (true) {
|
| 446 |
let totalHeight = 0;
|
|
|
|
| 450 |
totalHeight += distance;
|
| 451 |
await delay(500); // Increased delay for better loading
|
| 452 |
}
|
|
|
|
| 453 |
await delay(2000); // Extra wait after reaching bottom
|
|
|
|
| 454 |
const newHeight = document.body.scrollHeight;
|
| 455 |
if (newHeight === scrollHeight) break;
|
| 456 |
scrollHeight = newHeight;
|
| 457 |
}
|
|
|
|
| 458 |
// Scroll to top
|
| 459 |
window.scrollTo({ top: 0, behavior: "smooth" });
|
| 460 |
await delay(1000);
|
|
|
|
| 485 |
await page.evaluate(() => {
|
| 486 |
const getDocumentHeight = () => Math.max(
|
| 487 |
document.body.scrollHeight, document.body.offsetHeight,
|
| 488 |
+
document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight
|
|
|
|
| 489 |
);
|
| 490 |
const height = getDocumentHeight();
|
| 491 |
document.body.style.height = `${height}px !important`;
|
|
|
|
| 536 |
scale: 1,
|
| 537 |
omitBackground: false
|
| 538 |
});
|
|
|
|
| 539 |
|
| 540 |
+
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
|
| 541 |
return pdfBuffer;
|
| 542 |
+
|
| 543 |
} catch (error) {
|
| 544 |
console.error("β Error during PDF generation:", error);
|
| 545 |
if (error.message.includes('timeout')) {
|
|
|
|
| 563 |
}
|
| 564 |
};
|
| 565 |
|
| 566 |
+
/**
|
| 567 |
+
* NEW: StuDocu downloader with page-by-page progress streaming
|
| 568 |
+
*/
|
| 569 |
+
const studocuDownloaderStreamed = async (url, options, res) => {
|
| 570 |
+
let browser;
|
| 571 |
+
try {
|
| 572 |
+
console.log("π Launching browser for streaming with stealth configuration...");
|
| 573 |
+
browser = await puppeteer.launch({
|
| 574 |
+
headless: true,
|
| 575 |
+
args: [
|
| 576 |
+
'--no-sandbox',
|
| 577 |
+
'--disable-setuid-sandbox',
|
| 578 |
+
'--disable-dev-shm-usage',
|
| 579 |
+
'--disable-accelerated-2d-canvas',
|
| 580 |
+
'--no-first-run',
|
| 581 |
+
'--no-zygote',
|
| 582 |
+
'--disable-gpu'
|
| 583 |
+
],
|
| 584 |
+
timeout: 300000,
|
| 585 |
+
});
|
| 586 |
+
|
| 587 |
+
const page = await browser.newPage();
|
| 588 |
+
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
| 589 |
+
await page.setViewport({ width: 794, height: 1122 });
|
| 590 |
+
|
| 591 |
+
await page.evaluateOnNewDocument(() => {
|
| 592 |
+
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
| 593 |
+
});
|
| 594 |
+
|
| 595 |
+
await bypassCookiesAndRestrictions(page);
|
| 596 |
+
|
| 597 |
+
await page.setRequestInterception(true);
|
| 598 |
+
page.on('request', (req) => {
|
| 599 |
+
if (['image', 'stylesheet', 'font', 'other'].includes(req.resourceType()) && !req.url().includes('studocu.com')) {
|
| 600 |
+
req.abort();
|
| 601 |
+
} else {
|
| 602 |
+
req.continue();
|
| 603 |
+
}
|
| 604 |
+
});
|
| 605 |
+
|
| 606 |
+
if (options.email && options.password) {
|
| 607 |
+
console.log("π Logging in for streaming...");
|
| 608 |
+
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded' });
|
| 609 |
+
await page.waitForSelector('#email');
|
| 610 |
+
await page.type('#email', options.email);
|
| 611 |
+
await page.type('#password', options.password);
|
| 612 |
+
await page.click('button[type="submit"]');
|
| 613 |
+
await page.waitForNavigation({ waitUntil: 'networkidle2' });
|
| 614 |
+
console.log("β
Login successful for streaming.");
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
console.log(`π Navigating to ${url} for streaming...`);
|
| 618 |
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
| 619 |
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
| 620 |
+
|
| 621 |
+
await unblurContent(page);
|
| 622 |
+
|
| 623 |
+
console.log("β³ Waiting for document pages to load...");
|
| 624 |
+
await page.waitForSelector('[data-page]', { timeout: 30000 });
|
| 625 |
+
|
| 626 |
+
console.log("π Scrolling to load all pages for streaming...");
|
| 627 |
+
await page.evaluate(async () => {
|
| 628 |
+
await new Promise(resolve => {
|
| 629 |
+
let totalHeight = 0;
|
| 630 |
+
const distance = 100;
|
| 631 |
+
const timer = setInterval(() => {
|
| 632 |
+
const scrollHeight = document.body.scrollHeight;
|
| 633 |
+
window.scrollBy(0, distance);
|
| 634 |
+
totalHeight += distance;
|
| 635 |
+
if (totalHeight >= scrollHeight) {
|
| 636 |
+
clearInterval(timer);
|
| 637 |
+
resolve();
|
| 638 |
+
}
|
| 639 |
+
}, 100);
|
| 640 |
+
});
|
| 641 |
+
});
|
| 642 |
+
|
| 643 |
+
await unblurContent(page);
|
| 644 |
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
| 645 |
+
|
| 646 |
+
const pageElements = await page.$$('[data-page]');
|
| 647 |
+
const totalPages = pageElements.length;
|
| 648 |
+
console.log(`π Found ${totalPages} pages to stream.`);
|
| 649 |
+
|
| 650 |
+
if (totalPages === 0) {
|
| 651 |
+
throw new Error("No document pages found to stream. The content might be protected or not loaded correctly.");
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
// Set headers for streaming
|
| 655 |
+
res.setHeader('Content-Type', 'application/json');
|
| 656 |
+
res.setHeader('Transfer-Encoding', 'chunked');
|
| 657 |
+
|
| 658 |
+
for (let i = 0; i < totalPages; i++) {
|
| 659 |
+
console.log(`π¨ Rendering page ${i + 1} of ${totalPages}...`);
|
| 660 |
+
const pageElement = pageElements[i];
|
| 661 |
+
const imageData = await pageElement.screenshot({ type: 'png', encoding: 'base64' });
|
| 662 |
+
|
| 663 |
+
const progressUpdate = {
|
| 664 |
+
pageNumber: i + 1,
|
| 665 |
+
totalPages: totalPages,
|
| 666 |
+
imageData: `data:image/png;base64,${imageData}`
|
| 667 |
+
};
|
| 668 |
+
|
| 669 |
+
res.write(JSON.stringify(progressUpdate) + '\n'); // Send as a new line delimited JSON
|
| 670 |
+
}
|
| 671 |
+
|
| 672 |
+
console.log("β
All pages have been rendered and sent.");
|
| 673 |
+
|
| 674 |
+
} catch (error) {
|
| 675 |
+
console.error("β Error during streamed download:", error);
|
| 676 |
+
const errorResponse = {
|
| 677 |
+
error: `Failed to generate streamed PDF: ${error.message}`
|
| 678 |
+
};
|
| 679 |
+
if (!res.headersSent) {
|
| 680 |
+
res.status(500).json(errorResponse);
|
| 681 |
+
} else {
|
| 682 |
+
res.write(JSON.stringify(errorResponse) + '\n');
|
| 683 |
+
}
|
| 684 |
+
} finally {
|
| 685 |
+
if (browser) {
|
| 686 |
+
console.log("π Closing browser for streaming...");
|
| 687 |
+
await browser.close();
|
| 688 |
+
}
|
| 689 |
+
if (!res.writableEnded) {
|
| 690 |
+
res.end(); // End the stream
|
| 691 |
+
}
|
| 692 |
+
}
|
| 693 |
+
};
|
| 694 |
+
|
| 695 |
+
|
| 696 |
// API Routes
|
| 697 |
+
|
| 698 |
+
// Original endpoint for downloading the full PDF at once
|
| 699 |
app.post('/api/download', async (req, res) => {
|
| 700 |
const { url, filename, email, password } = req.body;
|
|
|
|
| 701 |
if (!url) {
|
| 702 |
return res.status(400).json({ error: 'URL is required.' });
|
| 703 |
}
|
|
|
|
| 704 |
if (!url.includes('studocu.com')) {
|
| 705 |
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
|
| 706 |
}
|
|
|
|
| 711 |
}
|
| 712 |
|
| 713 |
console.log(`π― Processing request for: ${normalizedUrl}`);
|
|
|
|
| 714 |
try {
|
| 715 |
const startTime = Date.now();
|
| 716 |
const pdfBuffer = await studocuDownloader(normalizedUrl, { filename, email, password });
|
| 717 |
const processingTime = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
|
|
| 718 |
res.setHeader('Content-Type', 'application/pdf');
|
| 719 |
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
|
| 720 |
res.setHeader('Content-Length', pdfBuffer.length);
|
| 721 |
res.send(pdfBuffer);
|
|
|
|
| 722 |
console.log(`π Request completed successfully in ${processingTime}s`);
|
| 723 |
} catch (error) {
|
| 724 |
console.error(`β Failed to process ${normalizedUrl}:`, error.message);
|
|
|
|
| 726 |
}
|
| 727 |
});
|
| 728 |
|
| 729 |
+
// NEW: Endpoint for streaming the document page by page
|
| 730 |
+
app.post('/api/download-stream', async (req, res) => {
|
| 731 |
+
const { url, email, password } = req.body;
|
| 732 |
+
if (!url) {
|
| 733 |
+
return res.status(400).json({ error: 'URL is required.' });
|
| 734 |
+
}
|
| 735 |
+
if (!url.includes('studocu.com')) {
|
| 736 |
+
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
let normalizedUrl = url.trim();
|
| 740 |
+
if (!normalizedUrl.startsWith('http')) {
|
| 741 |
+
normalizedUrl = 'https://' + normalizedUrl;
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
console.log(`π― Processing stream request for: ${normalizedUrl}`);
|
| 745 |
+
try {
|
| 746 |
+
await studocuDownloaderStreamed(normalizedUrl, { email, password }, res);
|
| 747 |
+
console.log(`π Stream request completed for ${normalizedUrl}`);
|
| 748 |
+
} catch (error) {
|
| 749 |
+
console.error(`β Failed to process stream for ${normalizedUrl}:`, error.message);
|
| 750 |
+
// Error is handled within the downloader function to ensure proper response closure
|
| 751 |
+
}
|
| 752 |
+
});
|
| 753 |
+
|
| 754 |
+
|
| 755 |
app.get('/health', (req, res) => {
|
| 756 |
+
res.json({
|
| 757 |
+
status: 'healthy',
|
| 758 |
+
timestamp: new Date().toISOString(),
|
| 759 |
+
uptime: process.uptime()
|
| 760 |
+
});
|
| 761 |
});
|
| 762 |
|
| 763 |
app.get('/', (req, res) => {
|
| 764 |
res.json({
|
| 765 |
+
message: 'π Enhanced StuDocu Downloader API v5.3 - Advanced Bypass with Print Styles and Streaming',
|
| 766 |
+
version: '5.3.1',
|
| 767 |
features: [
|
| 768 |
'πͺ Advanced cookie banner bypass',
|
| 769 |
'π Premium content unblurring (client-side only; server-side blur requires premium login)',
|
| 770 |
'π Login support for full unblurred content access',
|
| 771 |
'π€ Anti-bot detection evasion',
|
| 772 |
+
'π Full document content extraction with print styles for clean PDF',
|
| 773 |
+
'π Real-time page rendering and streaming to the frontend'
|
| 774 |
],
|
| 775 |
endpoints: {
|
| 776 |
download: 'POST /api/download (body: {url, filename?, email?, password?})',
|
| 777 |
+
download_stream: 'POST /api/download-stream (body: {url, email?, password?})',
|
| 778 |
health: 'GET /health'
|
| 779 |
},
|
| 780 |
note: 'For full unblurred content, provide premium email and password. Blurring is often server-side, so CSS bypass may not suffice without login.'
|
|
|
|
| 792 |
});
|
| 793 |
|
| 794 |
app.listen(port, () => {
|
| 795 |
+
console.log(`π Enhanced StuDocu Downloader v5.3.1 running on http://localhost:${port}`);
|
| 796 |
+
console.log(`β¨ Features: Advanced cookie bypass, content unblurring, login support, print styles, anti-detection, and real-time page streaming`);
|
| 797 |
});
|