Saving dan Loading¶
Menyimpan dan memuat model adalah bagian penting dari workflow machine learning. PyTorch menyediakan beberapa cara untuk melakukan ini.
Menyimpan State Dict (Disarankan)¶
import torch
import torch.nn as nn
# Model
model = nn.Sequential(
nn.Linear(10, 32),
nn.ReLU(),
nn.Linear(32, 2)
)
# Simpan state dict
torch.save(model.state_dict(), 'model_weights.pth')
# Load state dict
model_loaded = nn.Sequential(
nn.Linear(10, 32),
nn.ReLU(),
nn.Linear(32, 2)
)
model_loaded.load_state_dict(torch.load('model_weights.pth'))
model_loaded.eval() # Set ke evaluation mode
Menyimpan Seluruh Model¶
import torch
import torch.nn as nn
model = nn.Linear(10, 5)
# Simpan seluruh model
torch.save(model, 'full_model.pth')
# Load seluruh model
model_loaded = torch.load('full_model.pth')
model_loaded.eval()
Peringatan
Menyimpan seluruh model menggunakan pickle dan bergantung pada struktur class yang tersedia saat loading. Disarankan menggunakan state_dict untuk portabilitas lebih baik.
Checkpoint untuk Training¶
import torch
import torch.nn as nn
import torch.optim as optim
# Komponen
model = nn.Linear(10, 5)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10)
epoch = 50
loss = 0.5
# Simpan checkpoint
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler_state_dict': scheduler.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')
# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model_loaded = nn.Linear(10, 5)
optimizer_loaded = optim.Adam(model_loaded.parameters(), lr=0.001)
scheduler_loaded = optim.lr_scheduler.StepLR(optimizer_loaded, step_size=10)
model_loaded.load_state_dict(checkpoint['model_state_dict'])
optimizer_loaded.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler_loaded.load_state_dict(checkpoint['scheduler_state_dict'])
epoch_loaded = checkpoint['epoch']
loss_loaded = checkpoint['loss']
print(f"Resumed from epoch {epoch_loaded}, loss {loss_loaded}")
Save Best Model¶
import torch
class ModelCheckpoint:
def __init__(self, filepath, monitor='val_loss', mode='min'):
self.filepath = filepath
self.monitor = monitor
self.mode = mode
self.best = float('inf') if mode == 'min' else float('-inf')
def __call__(self, current_value, model):
if self.mode == 'min':
is_best = current_value < self.best
else:
is_best = current_value > self.best
if is_best:
self.best = current_value
torch.save(model.state_dict(), self.filepath)
print(f"Saved best model with {self.monitor}: {current_value:.4f}")
return True
return False
# Penggunaan
checkpoint = ModelCheckpoint('best_model.pth', monitor='val_acc', mode='max')
for epoch in range(num_epochs):
val_acc = train_and_validate()
checkpoint(val_acc, model)
Saving untuk Inference¶
import torch
import torch.nn as nn
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 32)
self.fc2 = nn.Linear(32, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
return self.fc2(x)
model = MyModel()
model.eval()
# Simpan untuk inference
torch.save({
'model_state_dict': model.state_dict(),
'model_config': {'input_dim': 10, 'hidden_dim': 32, 'output_dim': 2}
}, 'inference_model.pth')
# Load untuk inference
checkpoint = torch.load('inference_model.pth')
config = checkpoint['model_config']
model = MyModel() # Buat ulang dengan config jika perlu
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
# Inference
with torch.no_grad():
x = torch.randn(5, 10)
output = model(x)
print(output)
Menyimpan di Device Berbeda¶
Save dari GPU, Load ke CPU¶
import torch
# Simpan model yang ada di GPU
torch.save(model.state_dict(), 'model.pth')
# Load ke CPU
device = torch.device('cpu')
model.load_state_dict(torch.load('model.pth', map_location=device))
Load ke GPU Tertentu¶
import torch
# Load ke GPU 0
model.load_state_dict(torch.load('model.pth', map_location='cuda:0'))
# Atau gunakan device
device = torch.device('cuda:0')
model.load_state_dict(torch.load('model.pth', map_location=device))
model.to(device)
TorchScript untuk Production¶
import torch
import torch.nn as nn
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc = nn.Linear(10, 5)
def forward(self, x):
return torch.relu(self.fc(x))
model = MyModel()
model.eval()
# Trace model
example_input = torch.randn(1, 10)
traced_model = torch.jit.trace(model, example_input)
# Simpan
traced_model.save('model_traced.pt')
# Load (tidak perlu definisi class)
loaded = torch.jit.load('model_traced.pt')
output = loaded(example_input)
print(output)
ONNX Export¶
import torch
import torch.nn as nn
model = nn.Sequential(
nn.Linear(10, 32),
nn.ReLU(),
nn.Linear(32, 2)
)
model.eval()
# Dummy input
dummy_input = torch.randn(1, 10)
# Export ke ONNX
torch.onnx.export(
model,
dummy_input,
'model.onnx',
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)
Training Loop dengan Checkpoint¶
import torch
import torch.nn as nn
import torch.optim as optim
import os
def train_with_checkpoint(model, train_loader, val_loader, criterion,
optimizer, scheduler, num_epochs, checkpoint_dir='checkpoints'):
os.makedirs(checkpoint_dir, exist_ok=True)
start_epoch = 0
best_val_loss = float('inf')
# Resume dari checkpoint jika ada
checkpoint_path = os.path.join(checkpoint_dir, 'latest.pth')
if os.path.exists(checkpoint_path):
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
start_epoch = checkpoint['epoch'] + 1
best_val_loss = checkpoint['best_val_loss']
print(f"Resumed from epoch {start_epoch}")
for epoch in range(start_epoch, num_epochs):
# Training
model.train()
for X, y in train_loader:
optimizer.zero_grad()
outputs = model(X)
loss = criterion(outputs, y)
loss.backward()
optimizer.step()
# Validation
model.eval()
val_loss = 0
with torch.no_grad():
for X, y in val_loader:
outputs = model(X)
val_loss += criterion(outputs, y).item()
val_loss /= len(val_loader)
scheduler.step(val_loss)
# Save checkpoint
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler_state_dict': scheduler.state_dict(),
'best_val_loss': best_val_loss,
}
torch.save(checkpoint, os.path.join(checkpoint_dir, 'latest.pth'))
# Save best model
if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), os.path.join(checkpoint_dir, 'best.pth'))
print(f"Epoch {epoch+1}/{num_epochs}, Val Loss: {val_loss:.4f}")
return model
Latihan¶
Implementasikan checkpoint yang menyimpan top-3 model terbaik
Buat fungsi untuk mengekspor model ke ONNX dan memvalidasinya
Implementasikan resume training dari checkpoint
Buat script untuk konversi model antar device (CPU/GPU)