Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import pickle | |
| import joblib | |
| import torch.nn.functional as F | |
| from PIL import Image | |
| import gradio as gr | |
| from transformers import AutoModelForImageClassification | |
| from torch import nn | |
| from torchvision import transforms | |
| from huggingface_hub import hf_hub_download | |
| # Paths in Hugging Face model repository | |
| MODEL_PATH = "DeiT_Model_Parameter.pth" | |
| ENCODER_PATH = "label_encoder.pkl" | |
| # Ensure device is set | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def load_label_encoder(): | |
| # Load label encoder from Hugging Face repository | |
| label_encoder_path = hf_hub_download(repo_id="bobs24/DeiT-Classification-Apparel", filename=ENCODER_PATH) | |
| label_encoder = joblib.load(label_encoder_path) | |
| return label_encoder | |
| # Define the model class | |
| class CustomModel(nn.Module): | |
| def __init__(self, num_classes): | |
| super(CustomModel, self).__init__() | |
| self.base_model = AutoModelForImageClassification.from_pretrained( | |
| "facebook/deit-base-patch16-224", | |
| num_labels=num_classes, | |
| ignore_mismatched_sizes=True | |
| ) | |
| def forward(self, x): | |
| return self.base_model(x).logits | |
| def load_model(): | |
| # Load the model from Hugging Face repository | |
| model_path = hf_hub_download(repo_id="bobs24/DeiT-Classification-Apparel", filename=MODEL_PATH) | |
| label_encoder = load_label_encoder() | |
| model = CustomModel(num_classes=len(label_encoder.classes_)).to(device) | |
| model.load_state_dict(torch.load(model_path, map_location=device)) | |
| model.device = device | |
| model.eval() | |
| return model, label_encoder | |
| # Load the model and label encoder | |
| model, label_encoder = load_model() | |
| # Preprocessing as per your training setup | |
| preprocess = transforms.Compose([ | |
| transforms.Resize(256), # Resize to 256x256 (a bit larger than 224) | |
| transforms.CenterCrop(224), # Crop the center to 224x224 | |
| transforms.ToTensor(), # Convert to tensor | |
| transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # Normalize as per DeiT | |
| ]) | |
| # Function to perform predictions and show probabilities | |
| def predict(image): | |
| if image is None: # Check if no image was provided | |
| return "Please insert photo" | |
| # Apply preprocessing to the input image | |
| image = Image.fromarray(image).convert("RGB") | |
| input_tensor = preprocess(image).unsqueeze(0).to(device) | |
| # Perform inference | |
| with torch.no_grad(): | |
| output = model(input_tensor) | |
| # Apply softmax to get probabilities | |
| probabilities = F.softmax(output, dim=1) | |
| # Get the predicted label and confidence | |
| predicted_label = torch.argmax(probabilities, dim=1).item() | |
| confidence = probabilities[0, predicted_label].item() | |
| # Get the class name using label encoder | |
| class_name = label_encoder.inverse_transform([predicted_label])[0] | |
| return f"Predicted class: {class_name}, Confidence: {confidence:.4f}" | |
| # Create Gradio interface | |
| iface = gr.Interface(fn=predict, inputs=gr.Image(type="numpy"), outputs="text", live=True) | |
| # Launch the interface | |
| iface.launch() |