File size: 7,016 Bytes
96a6d41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
"""
Show what data is stored in MongoDB collections
"""
import os
import sys
import json
import logging
from datetime import datetime, timedelta
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
from bson import ObjectId
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class JSONEncoder(json.JSONEncoder):
"""Custom JSON encoder for MongoDB documents"""
def default(self, obj):
if isinstance(obj, ObjectId):
return str(obj)
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
def show_stored_data():
"""Show all data stored in MongoDB collections"""
mongodb_uri = os.getenv("MONGODB_URI")
if not mongodb_uri:
logger.error("β MONGODB_URI environment variable not set!")
return False
db_name = os.getenv("MONGODB_DB_NAME", "colorization_db")
try:
client = MongoClient(mongodb_uri, serverSelectionTimeoutMS=5000)
client.admin.command('ping')
logger.info("β
Connected to MongoDB\n")
db = client[db_name]
# Collection 1: api_calls
logger.info("=" * 80)
logger.info("COLLECTION 1: api_calls")
logger.info("=" * 80)
logger.info("\nπ Data Structure:")
logger.info("""
{
"_id": ObjectId, # MongoDB auto-generated ID
"endpoint": str, # API endpoint path (e.g., "/health", "/colorize")
"method": str, # HTTP method ("GET", "POST", etc.)
"status_code": int, # HTTP status code (200, 400, 500, etc.)
"timestamp": datetime, # UTC timestamp when API was called
"request_data": dict, # Request parameters/data
"response_data": dict, # Response data returned
"error": str or None, # Error message if any
"user_id": str or None, # User ID if authenticated
"ip_address": str or None # Client IP address
}
""")
api_calls = db["api_calls"]
total = api_calls.count_documents({})
logger.info(f"\nπ Total documents: {total}")
if total > 0:
logger.info("\nπ Sample Documents:")
samples = list(api_calls.find().sort("timestamp", -1).limit(3))
for i, doc in enumerate(samples, 1):
logger.info(f"\n Document {i}:")
logger.info(f" {json.dumps(doc, cls=JSONEncoder, indent=4)}")
# Collection 2: image_uploads
logger.info("\n" + "=" * 80)
logger.info("COLLECTION 2: image_uploads")
logger.info("=" * 80)
logger.info("\nπ Data Structure:")
logger.info("""
{
"_id": ObjectId, # MongoDB auto-generated ID
"image_id": str, # Unique image identifier (UUID)
"filename": str, # Original filename
"file_size": int, # File size in bytes
"content_type": str, # MIME type (e.g., "image/jpeg")
"uploaded_at": datetime, # UTC timestamp when image was uploaded
"user_id": str or None, # User ID if authenticated
"ip_address": str or None # Client IP address
}
""")
image_uploads = db["image_uploads"]
total = image_uploads.count_documents({})
logger.info(f"\nπ Total documents: {total}")
if total > 0:
logger.info("\nπ Sample Documents:")
samples = list(image_uploads.find().sort("uploaded_at", -1).limit(3))
for i, doc in enumerate(samples, 1):
logger.info(f"\n Document {i}:")
logger.info(f" {json.dumps(doc, cls=JSONEncoder, indent=4)}")
# Collection 3: colorizations
logger.info("\n" + "=" * 80)
logger.info("COLLECTION 3: colorizations")
logger.info("=" * 80)
logger.info("\nπ Data Structure:")
logger.info("""
{
"_id": ObjectId, # MongoDB auto-generated ID
"result_id": str, # Unique result identifier (UUID)
"image_id": str or None, # Original image identifier
"prompt": str or None, # Text prompt used (if any)
"model_type": str or None, # Model type ("fastai", "pytorch", "sdxl", "gan")
"processing_time": float or None, # Time taken to process in seconds
"created_at": datetime, # UTC timestamp when colorization was created
"user_id": str or None, # User ID if authenticated
"ip_address": str or None # Client IP address
}
""")
colorizations = db["colorizations"]
total = colorizations.count_documents({})
logger.info(f"\nπ Total documents: {total}")
if total > 0:
logger.info("\nπ Sample Documents:")
samples = list(colorizations.find().sort("created_at", -1).limit(3))
for i, doc in enumerate(samples, 1):
logger.info(f"\n Document {i}:")
logger.info(f" {json.dumps(doc, cls=JSONEncoder, indent=4)}")
# Summary
logger.info("\n" + "=" * 80)
logger.info("SUMMARY")
logger.info("=" * 80)
logger.info(f"\nDatabase: {db_name}")
logger.info(f"Total API calls logged: {api_calls.count_documents({})}")
logger.info(f"Total image uploads logged: {image_uploads.count_documents({})}")
logger.info(f"Total colorizations logged: {colorizations.count_documents({})}")
# Recent activity (last 24 hours)
recent_api = api_calls.count_documents({
"timestamp": {"$gte": datetime.utcnow() - timedelta(hours=24)}
})
recent_uploads = image_uploads.count_documents({
"uploaded_at": {"$gte": datetime.utcnow() - timedelta(hours=24)}
})
recent_colorizations = colorizations.count_documents({
"created_at": {"$gte": datetime.utcnow() - timedelta(hours=24)}
})
logger.info(f"\nπ Activity in last 24 hours:")
logger.info(f" API calls: {recent_api}")
logger.info(f" Image uploads: {recent_uploads}")
logger.info(f" Colorizations: {recent_colorizations}")
client.close()
return True
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
logger.error(f"β Failed to connect to MongoDB: {e}")
return False
except Exception as e:
logger.error(f"β Error: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = show_stored_data()
sys.exit(0 if success else 1)
|