Quick Start#

First, pytorch-pfn-extras organizes the training code implemented using PyTorch using the Trainer/Evaluator classes.

Next, it provides the following interfaces for training PyTorch models.

  1. Addition of extensions for analysis and visualization

  2. Runtime changes

  3. Addition of custom training steps

  4. Custom data handling

Step 1: Use Trainer#

First, pass to the Trainer the Model and Optimizer you want to train.

quick_start_trainer.py#
import pytorch_pfn_extras as ppe
import torch


class Model(torch.nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.linear = torch.nn.Linear(in_features=64, out_features=2)
        self.criterion = torch.nn.NLLLoss()

    def forward(self, x, target):
        y = self.linear.forward(x).log_softmax(dim=1)
        loss = self.criterion.forward(y, target)
        return {"loss": loss}


model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

device = (
    "cuda:0"  # or any other PyTorch devices ('cpu', etc.) or PPE runtime names
)
epochs = 3
# Create a trainer with the defined model, optimizer, and other parameters
trainer = ppe.engine.create_trainer(
    models=model,
    optimizers=optimizer,
    max_epochs=epochs,
    evaluator=ppe.engine.create_evaluator(
        models=model,
        device=device,
    ),
    device=device,
)

# Send the model to device(GPU) for computation
ppe.to(model, device=device)

batch_size = 10
# Create 10 batches of random training data with dimension (batch_size x 64)
training_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]
# Create 10 batches of random validation data with dimension (batch_size x 64)
validation_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]

# Start the training and validation of the model
trainer.run(train_loader=training_data, val_loader=validation_data)

print("Finish training!")

Step 2: Get Log#

Next, collect the logs of the training progress.

quick_start_log.py#
import pytorch_pfn_extras as ppe
import torch


class Model(torch.nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.linear = torch.nn.Linear(in_features=64, out_features=2)
        self.criterion = torch.nn.NLLLoss()

    def forward(self, x, target):
        y = self.linear.forward(x).log_softmax(dim=1)
        loss = self.criterion.forward(y, target)
        return {"loss": loss}


model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

device = "cuda:0"
epochs = 3
trainer = ppe.engine.create_trainer(
    models=model,
    optimizers=optimizer,
    max_epochs=epochs,
    evaluator=ppe.engine.create_evaluator(
        models=model,
        device=device,
        options={
            "eval_report_keys": [
                "loss"
            ],  # Let the value of the loss be notified to the LogReport.
        },
    ),
    device=device,
    options={
        "train_report_keys": [
            "loss"
        ],  # Let the value of the loss be notified to the LogReport.
    },
)

trainer.extend(
    ppe.training.extensions.LogReport()
)  # It is an extension to collect parameters reported during training.

ppe.to(model, device=device)

batch_size = 10
training_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]
validation_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]

trainer.run(train_loader=training_data, val_loader=validation_data)

print("Finish training!")

The logs of the collected learning progress are output to ./result/log.

Step 3: Display of progress#

Make it possible to check the progress of the learning.

quick_start_progress.py#
import pytorch_pfn_extras as ppe
import torch


class Model(torch.nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.linear = torch.nn.Linear(in_features=64, out_features=2)
        self.criterion = torch.nn.NLLLoss()

    def forward(self, x, target):
        y = self.linear.forward(x).log_softmax(dim=1)
        loss = self.criterion.forward(y, target)
        return {"loss": loss}


model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

device = "cuda:0"
epochs = 3
trainer = ppe.engine.create_trainer(
    models=model,
    optimizers=optimizer,
    max_epochs=epochs,
    evaluator=ppe.engine.create_evaluator(
        models=model,
        device=device,
        options={
            "eval_report_keys": ["loss"],
        },
    ),
    device=device,
    options={
        "train_report_keys": ["loss"],
    },
)

trainer.extend(ppe.training.extensions.LogReport())
trainer.extend(ppe.training.extensions.ProgressBar())
trainer.extend(
    ppe.training.extensions.PrintReport(  # Displays the collected logs interactively.
        [
            "epoch",  # epoch, iteration, elapsed_time are automatically collected by LogReport.
            "iteration",
            "elapsed_time",
            "train/loss",  # The parameters specified by train_report_keys are collected under keys with the 'train/' prefix.
            "val/loss",  # The parameters specified by eval_report_keys are collected under keys with the 'val/' prefix.
        ],
    )
)

ppe.to(model, device=device)

batch_size = 10
training_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]
validation_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]

trainer.run(train_loader=training_data, val_loader=validation_data)

print("Finish training!")

Step 4: Save Model#

Finally, save the trained model.

quick_start_save.py#
import pytorch_pfn_extras as ppe
import torch


class Model(torch.nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.linear = torch.nn.Linear(in_features=64, out_features=2)
        self.criterion = torch.nn.NLLLoss()

    def forward(self, x, target):
        y = self.linear.forward(x).log_softmax(dim=1)
        loss = self.criterion.forward(y, target)
        return {"loss": loss}


model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)


device = "cuda:0"
epochs = 3
trainer = ppe.engine.create_trainer(
    models=model,
    optimizers=optimizer,
    max_epochs=epochs,
    evaluator=ppe.engine.create_evaluator(
        models=model,
        device=device,
        options={
            "eval_report_keys": ["loss"],
        },
    ),
    device=device,
    options={
        "train_report_keys": ["loss"],
    },
)

trainer.extend(ppe.training.extensions.LogReport())
trainer.extend(ppe.training.extensions.ProgressBar())
trainer.extend(
    ppe.training.extensions.PrintReport(  # Displays the collected logs interactively.
        [
            "epoch",  # epoch, iteration, elapsed_time are automatically collected by LogReport.
            "iteration",
            "elapsed_time",
            "train/loss",  # The parameters specified by train_report_keys are collected under keys with the 'train/' prefix.
            "val/loss",  # The parameters specified by eval_report_keys are collected under keys with the 'val/' prefix.
        ],
    )
)
trainer.extend(
    ppe.training.extensions.snapshot(target=model)
)  # Save the model parameters after each epoch.

ppe.to(model, device=device)

batch_size = 10
training_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]
validation_data = [
    {
        "x": torch.rand((batch_size, 64)),
        "target": torch.ones((batch_size,), dtype=torch.long),
    }
    for _ in range(10)
]

trainer.run(train_loader=training_data, val_loader=validation_data)

print("Finish training!")

The model parameters are stored with a file name that includes the time they were saved under ./result.

Snapshots are generated using state_dict(). Please refer to the official PyTorch docs for how to load the model.