Train a Font Identifier using ResNet18

Browse files

Files changed (5) hide show

README.md +3 -2
arrange_train_test_images.py +2 -2
gen_sample_data.py +3 -1
requirements.txt +5 -0
train_font_identifier.py +122 -0

README.md CHANGED Viewed

@@ -7,5 +7,6 @@ Follow along:
 - [On Threads.net](https://www.threads.net/@gaborcselle/post/CzZJpJCpxTz)
 - [On Twitter](https://twitter.com/gabor/status/1722300841691103467)
-Generate sample images (note this will work only on Mac): [gen_sample_data.py]
-Arrange test images into test and train: [arrange_train_test_images.py]

 - [On Threads.net](https://www.threads.net/@gaborcselle/post/CzZJpJCpxTz)
 - [On Twitter](https://twitter.com/gabor/status/1722300841691103467)
+Generate sample images (note this will work only on Mac): [gen_sample_data.py](gen_sample_data.py)
+Arrange test images into test and train: [arrange_train_test_images.py](arrange_train_test_images.py)
+Train a ResNet18 on the data: [train_font_identifier.py](train_font_identifier.py)

arrange_train_test_images.py CHANGED Viewed

@@ -29,10 +29,10 @@ for font in fonts:
     train_files = font_files[:int(0.8 * len(font_files))]
     test_files = font_files[int(0.8 * len(font_files)):]
-    # Moving training files
     for train_file in train_files:
         shutil.move(os.path.join(source_dir, train_file), font_train_dir)
-    # Moving test files
     for test_file in test_files:
         shutil.move(os.path.join(source_dir, test_file), font_test_dir)

     train_files = font_files[:int(0.8 * len(font_files))]
     test_files = font_files[int(0.8 * len(font_files)):]
+    # Move training files
     for train_file in train_files:
         shutil.move(os.path.join(source_dir, train_file), font_train_dir)
+    # Move test files
     for test_file in test_files:
         shutil.move(os.path.join(source_dir, test_file), font_test_dir)

gen_sample_data.py CHANGED Viewed

@@ -7,6 +7,8 @@ import nltk
 from nltk.corpus import brown
 import random
 # Download the necessary data from nltk
 nltk.download('brown')
@@ -55,7 +57,7 @@ for font_dir in font_dirs:
             # Counter for the image filename
             j = 0
-            for i in range(10):  # Generate 50 images per font - reduced to 10 for now to make things faster
                 prose_sample = random_prose_text(all_brown_words)
                 for text in [prose_sample]:

 from nltk.corpus import brown
 import random
+IMAGES_PER_FONT = 50
 # Download the necessary data from nltk
 nltk.download('brown')
             # Counter for the image filename
             j = 0
+            for i in range(IMAGES_PER_FONT):  # Generate 50 images per font - reduced to 10 for now to make things faster
                 prose_sample = random_prose_text(all_brown_words)
                 for text in [prose_sample]:

requirements.txt CHANGED Viewed

	@@ -1 +1,6 @@

1	Pillow==9.5.0

+nltk==3.8.1
 Pillow==9.5.0
+torch==2.0.0
+torchaudio==2.0.1
+torchvision==0.15.1
+tqdm==4.65.0

train_font_identifier.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import copy
+import os
+import time
+import torch
+import torch.optim as optim
+from torch.optim import lr_scheduler
+from torchvision import datasets, models, transforms
+from tqdm import tqdm
+# Directory with organized font images
+data_dir = './train_test_images'
+# Define transformations for the image data
+data_transforms = {
+    'train': transforms.Compose([
+        transforms.Resize((224, 224)),  # Resize to the input size expected by the model
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet standards
+    ]),
+    'test': transforms.Compose([
+        transforms.Resize((224, 224)),  # Resize to the input size expected by the model
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+    ]),
+}
+# Create datasets
+image_datasets = {
+    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
+    for x in ['train', 'test']
+}
+# Create dataloaders
+dataloaders = {
+    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=4),
+    'test': torch.utils.data.DataLoader(image_datasets['test'], batch_size=4)
+}
+# Define the model
+model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
+# Define the loss function
+criterion = torch.nn.CrossEntropyLoss()
+# Optimizer (you can replace 'model.parameters()' with specific parameters to optimize if needed)
+optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
+# Decay LR by a factor of 0.1 every 7 epochs
+exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
+# Number of epochs to train for
+num_epochs = 25
+def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
+    since = time.time()
+    best_model_wts = copy.deepcopy(model.state_dict())
+    best_acc = 0.0
+    for epoch in range(num_epochs):
+        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
+        print('-' * 10)
+        # Each epoch has a training and validation phase
+        for phase in ['train', 'test']:
+            if phase == 'train':
+                model.train()  # Set model to training mode
+            else:
+                model.eval()   # Set model to evaluate mode
+            running_loss = 0.0
+            running_corrects = 0
+            # Iterate over data.
+            # Here we wrap the dataloader with tqdm for a progress bar
+            for inputs, labels in tqdm(dataloaders[phase], desc=f"Epoch {epoch} - {phase}"):
+                # Zero the parameter gradients
+                optimizer.zero_grad()
+                # Forward
+                # Track history if only in train
+                with torch.set_grad_enabled(phase == 'train'):
+                    outputs = model(inputs)
+                    _, preds = torch.max(outputs, 1)
+                    loss = criterion(outputs, labels)
+                    # Backward + optimize only if in training phase
+                    if phase == 'train':
+                        loss.backward()
+                        optimizer.step()
+                # Statistics
+                running_loss += loss.item() * inputs.size(0)
+                running_corrects += torch.sum(preds == labels.data)
+            if phase == 'train':
+                scheduler.step()
+            epoch_loss = running_loss / len(image_datasets[phase])
+            epoch_acc = running_corrects.double() / len(image_datasets[phase])
+            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
+                phase, epoch_loss, epoch_acc))
+            # Deep copy the model
+            if phase == 'test' and epoch_acc > best_acc:
+                best_acc = epoch_acc
+                best_model_wts = copy.deepcopy(model.state_dict())
+        print()
+    time_elapsed = time.time() - since
+    print('Training complete in {:.0f}m {:.0f}s'.format(
+        time_elapsed // 60, time_elapsed % 60))
+    print('Best test Acc: {:4f}'.format(best_acc))
+    # Load best model weights
+    model.load_state_dict(best_model_wts)
+    return model
+# Train the model
+model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=num_epochs)