text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14

Commit

0a1a3e1

1 Parent(s): 293fc40

Fix image colorization: Add PyTorch GAN colorizer fallback, update Dockerfile to use main_fastai, and add missing dependencies

Browse files

Files changed (4) hide show

Dockerfile +1 -1
app/main_fastai.py +68 -39
app/pytorch_colorizer.py +247 -0
requirements.txt +4 -1

Dockerfile CHANGED Viewed

@@ -63,4 +63,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
 ENTRYPOINT ["/entrypoint.sh"]
 # Run the application (port will be set via environment variable)
-CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}"]

 ENTRYPOINT ["/entrypoint.sh"]
 # Run the application (port will be set via environment variable)
+CMD ["sh", "-c", "uvicorn app.main_fastai:app --host 0.0.0.0 --port ${PORT:-7860}"]

app/main_fastai.py CHANGED Viewed

@@ -34,6 +34,7 @@ from fastai.vision.all import *
 from huggingface_hub import from_pretrained_fastai
 from app.config import settings
 # Configure logging
 logging.basicConfig(
@@ -94,30 +95,50 @@ app.mount("/uploads", StaticFiles(directory=str(UPLOAD_DIR)), name="uploads")
 # Initialize FastAI model
 learn = None
 model_load_error: Optional[str] = None
 @app.on_event("startup")
 async def startup_event():
-    """Load FastAI model on startup"""
-    global learn, model_load_error
     try:
-        model_id = os.getenv("MODEL_ID", "Hammad712/GAN-Colorization-Model")
-        logger.info("🔄 Loading FastAI GAN Colorization Model: %s", model_id)
         learn = from_pretrained_fastai(model_id)
-        logger.info("✅ Model loaded successfully!")
         model_load_error = None
     except Exception as e:
         error_msg = str(e)
-        logger.error("❌ Failed to load model: %s", error_msg)
         model_load_error = error_msg
         # Don't raise - allow health check to work
 @app.on_event("shutdown")
 async def shutdown_event():
     """Cleanup on shutdown"""
-    global learn
     if learn:
         del learn
     logger.info("Application shutdown")
 def _extract_bearer_token(authorization_header: str | None) -> str | None:
@@ -182,7 +203,8 @@ async def health_check():
     """Health check endpoint"""
     response = {
         "status": "healthy",
-        "model_loaded": learn is not None,
         "model_id": os.getenv("MODEL_ID", "Hammad712/GAN-Colorization-Model")
     }
     if model_load_error:
@@ -191,38 +213,45 @@ async def health_check():
 def colorize_pil(image: Image.Image) -> Image.Image:
     """Run model prediction and return colorized image"""
-    if learn is None:
-        raise RuntimeError("Model not loaded")
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    pred = learn.predict(image)
-    # Handle different return types from FastAI
-    if isinstance(pred, (list, tuple)):
-        colorized = pred[0] if len(pred) > 0 else image
-    else:
-        colorized = pred
-    # Ensure we have a PIL Image
-    if not isinstance(colorized, Image.Image):
-        if isinstance(colorized, torch.Tensor):
-            # Convert tensor to PIL
-            if colorized.dim() == 4:
-                colorized = colorized[0]
-            if colorized.dim() == 3:
-                colorized = colorized.permute(1, 2, 0).cpu()
-                if colorized.dtype in (torch.float32, torch.float16):
-                    colorized = torch.clamp(colorized, 0, 1)
-                    colorized = (colorized * 255).byte()
-                colorized = Image.fromarray(colorized.numpy(), 'RGB')
-            else:
-                raise ValueError(f"Unexpected tensor shape: {colorized.shape}")
         else:
-            raise ValueError(f"Unexpected prediction type: {type(colorized)}")
-    if colorized.mode != "RGB":
-        colorized = colorized.convert("RGB")
-    return colorized
 @app.post("/colorize")
 async def colorize_api(
@@ -233,7 +262,7 @@ async def colorize_api(
     Upload a black & white image -> returns colorized image.
     Requires Firebase authentication unless DISABLE_AUTH=true
     """
-    if learn is None:
         raise HTTPException(status_code=503, detail="Colorization model not loaded")
     if not file.content_type or not file.content_type.startswith("image/"):
@@ -270,7 +299,7 @@ def gradio_colorize(image):
     if image is None:
         return None
     try:
-        if learn is None:
             return None
         return colorize_pil(image)
     except Exception as e:

 from huggingface_hub import from_pretrained_fastai
 from app.config import settings
+from app.pytorch_colorizer import PyTorchColorizer
 # Configure logging
 logging.basicConfig(
 # Initialize FastAI model
 learn = None
+pytorch_colorizer = None
 model_load_error: Optional[str] = None
+model_type: str = "none"  # "fastai", "pytorch", or "none"
 @app.on_event("startup")
 async def startup_event():
+    """Load FastAI or PyTorch model on startup"""
+    global learn, pytorch_colorizer, model_load_error, model_type
+    model_id = os.getenv("MODEL_ID", "Hammad712/GAN-Colorization-Model")
+    # Try FastAI first
     try:
+        logger.info("🔄 Attempting to load FastAI GAN Colorization Model: %s", model_id)
         learn = from_pretrained_fastai(model_id)
+        logger.info("✅ FastAI model loaded successfully!")
+        model_type = "fastai"
         model_load_error = None
+        return
     except Exception as e:
         error_msg = str(e)
+        logger.warning("⚠️ FastAI model loading failed: %s. Trying PyTorch fallback...", error_msg)
+    # Fallback to PyTorch
+    try:
+        logger.info("🔄 Attempting to load PyTorch GAN Colorization Model: %s", model_id)
+        pytorch_colorizer = PyTorchColorizer(model_id=model_id, model_filename="generator.pt")
+        logger.info("✅ PyTorch model loaded successfully!")
+        model_type = "pytorch"
+        model_load_error = None
+    except Exception as e:
+        error_msg = str(e)
+        logger.error("❌ Failed to load both FastAI and PyTorch models: %s", error_msg)
         model_load_error = error_msg
+        model_type = "none"
         # Don't raise - allow health check to work
 @app.on_event("shutdown")
 async def shutdown_event():
     """Cleanup on shutdown"""
+    global learn, pytorch_colorizer
     if learn:
         del learn
+    if pytorch_colorizer:
+        del pytorch_colorizer
     logger.info("Application shutdown")
 def _extract_bearer_token(authorization_header: str | None) -> str | None:
     """Health check endpoint"""
     response = {
         "status": "healthy",
+        "model_loaded": (learn is not None) or (pytorch_colorizer is not None),
+        "model_type": model_type,
         "model_id": os.getenv("MODEL_ID", "Hammad712/GAN-Colorization-Model")
     }
     if model_load_error:
 def colorize_pil(image: Image.Image) -> Image.Image:
     """Run model prediction and return colorized image"""
+    # Try FastAI first
+    if learn is not None:
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        pred = learn.predict(image)
+        # Handle different return types from FastAI
+        if isinstance(pred, (list, tuple)):
+            colorized = pred[0] if len(pred) > 0 else image
         else:
+            colorized = pred
+        # Ensure we have a PIL Image
+        if not isinstance(colorized, Image.Image):
+            if isinstance(colorized, torch.Tensor):
+                # Convert tensor to PIL
+                if colorized.dim() == 4:
+                    colorized = colorized[0]
+                if colorized.dim() == 3:
+                    colorized = colorized.permute(1, 2, 0).cpu()
+                    if colorized.dtype in (torch.float32, torch.float16):
+                        colorized = torch.clamp(colorized, 0, 1)
+                        colorized = (colorized * 255).byte()
+                    colorized = Image.fromarray(colorized.numpy(), 'RGB')
+                else:
+                    raise ValueError(f"Unexpected tensor shape: {colorized.shape}")
+            else:
+                raise ValueError(f"Unexpected prediction type: {type(colorized)}")
+        if colorized.mode != "RGB":
+            colorized = colorized.convert("RGB")
+        return colorized
+    # Fallback to PyTorch
+    elif pytorch_colorizer is not None:
+        return pytorch_colorizer.colorize(image)
+    else:
+        raise RuntimeError("No colorization model loaded")
 @app.post("/colorize")
 async def colorize_api(
     Upload a black & white image -> returns colorized image.
     Requires Firebase authentication unless DISABLE_AUTH=true
     """
+    if learn is None and pytorch_colorizer is None:
         raise HTTPException(status_code=503, detail="Colorization model not loaded")
     if not file.content_type or not file.content_type.startswith("image/"):
     if image is None:
         return None
     try:
+        if learn is None and pytorch_colorizer is None:
             return None
         return colorize_pil(image)
     except Exception as e:

app/pytorch_colorizer.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+PyTorch GAN Colorization Model Loader
+Handles loading and inference for PyTorch GAN colorization models
+"""
+import functools
+import logging
+import os
+from typing import Tuple
+import torch
+import torch.nn as nn
+from PIL import Image
+from torchvision import transforms
+from huggingface_hub import hf_hub_download
+logger = logging.getLogger(__name__)
+class UNetGenerator(nn.Module):
+    """
+    U-Net Generator for Image Colorization
+    Common architecture for GAN-based colorization models
+    """
+    def __init__(self, input_nc=1, output_nc=3, num_downs=8, ngf=64, use_dropout=False):
+        super(UNetGenerator, self).__init__()
+        # Build U-Net
+        unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None,
+                                            norm_layer=nn.BatchNorm2d, innermost=True)
+        for i in range(num_downs - 5):
+            unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None,
+                                                submodule=unet_block, norm_layer=nn.BatchNorm2d,
+                                                use_dropout=use_dropout)
+        unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None,
+                                            submodule=unet_block, norm_layer=nn.BatchNorm2d)
+        unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None,
+                                            submodule=unet_block, norm_layer=nn.BatchNorm2d)
+        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None,
+                                            submodule=unet_block, norm_layer=nn.BatchNorm2d)
+        self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc,
+                                            submodule=unet_block, outermost=True,
+                                            norm_layer=nn.BatchNorm2d)
+    def forward(self, input):
+        return self.model(input)
+class UnetSkipConnectionBlock(nn.Module):
+    """Defines the Unet submodule with skip connection"""
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False,
+                 norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == nn.InstanceNorm2d
+        else:
+            use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        downnorm = norm_layer(inner_nc)
+        uprelu = nn.ReLU(True)
+        upnorm = norm_layer(outer_nc)
+        if outermost:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1)
+            down = [downconv]
+            up = [uprelu, upconv, nn.Tanh()]
+            model = down + [submodule] + up
+        elif innermost:
+            upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            up = [uprelu, upconv, upnorm]
+            model = down + up
+        else:
+            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
+                                        kernel_size=4, stride=2,
+                                        padding=1, bias=use_bias)
+            down = [downrelu, downconv, downnorm]
+            up = [uprelu, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:
+            return torch.cat([x, self.model(x)], 1)
+class PyTorchColorizer:
+    """PyTorch GAN Colorization Model"""
+    def __init__(self, model_id: str = "Hammad712/GAN-Colorization-Model", model_filename: str = "generator.pt"):
+        self.model_id = model_id
+        self.model_filename = model_filename
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = None
+        self.cache_dir = os.environ.get("HF_HOME", "/tmp/hf_cache")
+        logger.info(f"Loading PyTorch GAN colorization model: {model_id}/{model_filename}")
+        self._load_model()
+    def _load_model(self):
+        """Load the PyTorch model"""
+        try:
+            # Download model file
+            model_path = hf_hub_download(
+                repo_id=self.model_id,
+                filename=self.model_filename,
+                cache_dir=self.cache_dir
+            )
+            logger.info(f"Model downloaded to: {model_path}")
+            # Try loading the model file
+            # First, try loading as a complete model (if saved with torch.save(model, path))
+            try:
+                loaded_obj = torch.load(model_path, map_location=self.device)
+                # Check if it's already a model instance
+                if isinstance(loaded_obj, nn.Module):
+                    self.model = loaded_obj
+                    self.model.eval()
+                    self.model.to(self.device)
+                    logger.info("✅ Loaded complete model object")
+                    return
+                # Otherwise, it's likely a state_dict
+                state_dict = loaded_obj
+            except Exception as e:
+                logger.error(f"Failed to load model file: {e}")
+                raise
+            # Try different model architectures with state_dict
+            model_configs = [
+                {"input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 64},
+                {"input_nc": 1, "output_nc": 3, "num_downs": 7, "ngf": 64},
+                {"input_nc": 1, "output_nc": 3, "num_downs": 8, "ngf": 32},
+                {"input_nc": 1, "output_nc": 3, "num_downs": 6, "ngf": 64},
+            ]
+            loaded = False
+            for config in model_configs:
+                try:
+                    model = UNetGenerator(**config)
+                    # Try strict loading first
+                    try:
+                        model.load_state_dict(state_dict, strict=True)
+                        logger.info(f"✅ Successfully loaded model with strict matching: {config}")
+                    except:
+                        # If strict fails, try non-strict
+                        model.load_state_dict(state_dict, strict=False)
+                        logger.info(f"✅ Successfully loaded model with non-strict matching: {config}")
+                    model.eval()
+                    model.to(self.device)
+                    self.model = model
+                    loaded = True
+                    break
+                except Exception as e:
+                    logger.debug(f"Failed to load with config {config}: {e}")
+                    continue
+            if not loaded:
+                # Last resort: try with default config and non-strict loading
+                try:
+                    logger.warning("Attempting to load model with default config and non-strict matching")
+                    model = UNetGenerator(input_nc=1, output_nc=3, num_downs=8, ngf=64)
+                    model.load_state_dict(state_dict, strict=False)
+                    model.eval()
+                    model.to(self.device)
+                    self.model = model
+                    logger.info("✅ Model loaded with fallback method")
+                except Exception as e:
+                    logger.error(f"Failed to load model: {e}")
+                    raise RuntimeError(
+                        f"Could not load PyTorch model. Tried multiple architectures. "
+                        f"Last error: {e}. "
+                        f"The model architecture may not match the expected U-Net structure."
+                    )
+        except Exception as e:
+            logger.error(f"Error loading PyTorch model: {e}")
+            raise RuntimeError(f"Failed to load PyTorch colorization model: {e}")
+    def colorize(self, image: Image.Image) -> Image.Image:
+        """
+        Colorize a grayscale or color image
+        Args:
+            image: PIL Image (will be converted to grayscale if color)
+        Returns:
+            Colorized PIL Image
+        """
+        if self.model is None:
+            raise RuntimeError("Model not loaded")
+        original_size = image.size
+        # Convert to grayscale if needed
+        if image.mode != "L":
+            image = image.convert("L")
+        # Transform to tensor
+        transform = transforms.Compose([
+            transforms.Resize((256, 256)),  # Common size for GAN models
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1, 1]
+        ])
+        input_tensor = transform(image).unsqueeze(0).to(self.device)
+        # Run inference
+        with torch.no_grad():
+            output_tensor = self.model(input_tensor)
+        # Convert output back to PIL Image
+        # Output is typically in range [-1, 1] from Tanh activation
+        output_tensor = output_tensor.squeeze(0).cpu()
+        output_tensor = (output_tensor + 1) / 2.0  # Denormalize from [-1, 1] to [0, 1]
+        output_tensor = torch.clamp(output_tensor, 0, 1)
+        # Convert to numpy and then PIL
+        output_array = (output_tensor.permute(1, 2, 0).numpy() * 255).astype('uint8')
+        output_image = Image.fromarray(output_array, 'RGB')
+        # Resize back to original size
+        if output_image.size != original_size:
+            output_image = output_image.resize(original_size, Image.Resampling.LANCZOS)
+        return output_image

requirements.txt CHANGED Viewed

@@ -4,4 +4,7 @@ fastapi
 uvicorn
 gradio
 pillow
-firebase-admin

 uvicorn
 gradio
 pillow
+firebase-admin
+fastai
+huggingface_hub
+pydantic-settings