pythonpytorch

How can I convert images to 1-bit tensor and use them for To reduce RAM and GPU usage and training in PyTorch?


I created a code using the PyTorch library, and I am training with .png images that are normally 24-32 bits. To reduce RAM and GPU usage, I converted the images to 1-bit (keeping their size fixed at 512x512). However, there was no change in the training time. I have included the code; please review it and provide feedback.

The images were converted to 1-bit, but the training time didn't change. I don't want to change the size of the images, just reduce them to 1-bit to speed up the training. I’m not sure what to do to overcome the bottleneck. It seems like the code might not be converting to 1-bit correctly.

# -*- coding: utf-8 -*-
# setxkbmap tr

### Import all the Dependencies

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from PIL import Image

### Set all the Constants

BATCH_SIZE = 32
IMAGE_SIZE = 512
CHANNELS = 1  # 1-channel for binary images
EPOCHS = 500
n_classes = 2
EARLY_STOPPING_PATIENCE = 5  # Number of epochs to wait for improvement before stopping

### Data Transformations

transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),  # Resize images to the desired size
    transforms.Grayscale(num_output_channels=CHANNELS),  # Ensure the image is single-channel
    transforms.ToTensor(),  # Convert image to Tensor
])

### Custom Dataset Class for Binary Images

class BinaryImageDataset(datasets.ImageFolder):
    def __getitem__(self, index):
        img, label = super().__getitem__(index)
        img = transforms.ToPILImage()(img)  # Convert Tensor to PIL image
        img = img.convert('1')  # Convert image to 1-bit depth
        img = transforms.ToTensor()(img)  # Convert back to Tensor
        return img, label

# Import data into PyTorch dataset object
dataset = BinaryImageDataset(root="/home/han/Documents/04.09.2024/500_TrainAB_Binary", transform=transform)

# Print the number of images and classes
print(f"Number of images in dataset: {len(dataset)}")
print(f"Classes: {dataset.classes}")

# Print the size of the first image (as Tensor) and its label
first_image, first_label = dataset[0]
print(f"Size of the first image (Tensor): {first_image.size()}")  # Prints: [C, H, W]

# Extract the dimensions
channels, height, width = first_image.size()
print(f"Channels: {channels}")
print(f"Height: {height}")
print(f"Width: {width}")

print(f"Label of the first image: {dataset.classes[first_label]}")

### Train, Test, Validation Data Split  

def get_dataset_partitions(dataset, train_split=0.8, val_split=0.1, test_split=0.1):
    assert (train_split + val_split + test_split) == 1
    
    ds_size = len(dataset)
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds, remaining_ds = random_split(dataset, [train_size, ds_size - train_size])
    val_ds, test_ds = random_split(remaining_ds, [val_size, ds_size - train_size - val_size])
    
    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = get_dataset_partitions(dataset)

print(f"Size of Data is: {len(dataset)}")
print(f"Size of Training Data: {len(train_ds)}")
print(f"Size of Validation Data: {len(val_ds)}")
print(f"Size of Testing Data: {len(test_ds)}")

### Data Loaders

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

### Building the Model

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(in_channels=CHANNELS, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(512 * (IMAGE_SIZE // 32) * (IMAGE_SIZE // 32), 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, n_classes)
        )

    def forward(self, x):
        return self.model(x)

model = CNNModel()

### Loss and Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Training the Model with Early Stopping

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    best_model_wts = model.state_dict()
    best_acc = 0.0
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        corrects = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)
            total += labels.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = corrects.double() / total
        print(f"Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        # Validation phase
        model.eval()
        val_corrects = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                val_corrects += torch.sum(preds == labels.data)
                val_total += labels.size(0)

        val_acc = val_corrects.double() / val_total
        print(f"Validation Accuracy: {val_acc:.4f}")

        # Check for improvement
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = model.state_dict()
            epochs_without_improvement = 0  # Reset the counter
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch} epochs without improvement.")
            break

    print(f"Best Validation Accuracy: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, EARLY_STOPPING_PATIENCE)

### Testing the Model

def test_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    corrects = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == labels.data)
            total += labels.size(0)

    test_acc = corrects.double() / total
    print(f"Test Accuracy: {test_acc:.4f}")

test_model(model, test_loader)


Solution

  • As far as I know , Pytorch does not support 1 bit tensors. Their smallest datatype available is torch.uint8. which stores 8 bits per value. This might be the reason why the code is not converting to 1bit.

    You can try converting to torch.uint8 which still reduces memory and GPU usage compared to torch.float32.

    To see the datatype after transformation, try

    print(img.dtype)
    

    which most likely will show torch.float32 which is set by default.