Neural Network Module¶

torch.nn.Module adalah base class untuk semua neural network di PyTorch. Modul ini menyediakan cara terstruktur untuk membangun dan mengorganisir model.

Mendefinisikan Model¶

Dengan nn.Sequential¶

import torch
import torch.nn as nn

# Sequential: urutan layer linear
model = nn.Sequential(
    nn.Linear(10, 32),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.ReLU(),
    nn.Linear(16, 1)
)

# Forward pass
x = torch.randn(5, 10)  # batch_size=5, features=10
output = model(x)
print(f"Output shape: {output.shape}")  # torch.Size([5, 1])

Dengan Custom Class¶

import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(4, 3)
        self.linear2 = nn.Linear(3, 1)
    
    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.linear2(x)
        return x

net = Net()
print(net)

# Forward pass
x = torch.randn(4)
output = net(x)
print(f"Output: {output}")

Layer Umum¶

Linear Layer¶

import torch.nn as nn

# Fully connected layer
linear = nn.Linear(in_features=10, out_features=5)

# Dengan bias=False
linear_no_bias = nn.Linear(10, 5, bias=False)

Activation Functions¶

import torch
import torch.nn as nn
import torch.nn.functional as F

x = torch.randn(3)

# Sebagai module
relu = nn.ReLU()
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()
softmax = nn.Softmax(dim=-1)

# Sebagai functional
y = F.relu(x)
y = F.sigmoid(x)
y = F.tanh(x)
y = F.softmax(x, dim=-1)

# Variasi ReLU
leaky_relu = nn.LeakyReLU(negative_slope=0.01)
elu = nn.ELU(alpha=1.0)
gelu = nn.GELU()

Convolutional Layers¶

import torch.nn as nn

# 2D Convolution
conv2d = nn.Conv2d(
    in_channels=3,       # RGB
    out_channels=16,
    kernel_size=3,
    stride=1,
    padding=1
)

# 1D Convolution
conv1d = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)

Pooling Layers¶

import torch.nn as nn

# Max pooling
maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

# Average pooling
avgpool = nn.AvgPool2d(kernel_size=2, stride=2)

# Adaptive pooling (output size tetap)
adaptive_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))

Normalization Layers¶

import torch.nn as nn

# Batch Normalization
bn = nn.BatchNorm2d(num_features=64)

# Layer Normalization
ln = nn.LayerNorm(normalized_shape=64)

# Instance Normalization
in_norm = nn.InstanceNorm2d(num_features=64)

Dropout¶

import torch.nn as nn

dropout = nn.Dropout(p=0.5)

# Dalam model
class NetWithDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(100, 50)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(50, 10)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

Embedding¶

import torch
import torch.nn as nn

# Embedding layer
embedding = nn.Embedding(num_embeddings=1000, embedding_dim=64)

# Lookup
indices = torch.tensor([1, 5, 10])
embedded = embedding(indices)
print(f"Embedded shape: {embedded.shape}")  # torch.Size([3, 64])

RNN, LSTM, GRU¶

import torch.nn as nn

# LSTM
lstm = nn.LSTM(
    input_size=10,
    hidden_size=20,
    num_layers=2,
    batch_first=True,
    dropout=0.1,
    bidirectional=True
)

# GRU
gru = nn.GRU(input_size=10, hidden_size=20, num_layers=2, batch_first=True)

Contoh Model¶

MLP Classifier¶

import torch
import torch.nn as nn
import torch.nn.functional as F

class MLPClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, num_classes)
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

model = MLPClassifier(input_dim=784, hidden_dim=256, num_classes=10)
print(model)

CNN untuk Gambar¶

import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        # Input: (batch, 3, 32, 32)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # (batch, 32, 16, 16)
        x = self.pool(F.relu(self.bn2(self.conv2(x))))  # (batch, 64, 8, 8)
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

Parameter dan Akses¶

import torch.nn as nn

model = nn.Sequential(
    nn.Linear(10, 20),
    nn.ReLU(),
    nn.Linear(20, 5)
)

# Lihat semua parameters
for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")

# Total parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total: {total_params}, Trainable: {trainable_params}")

# Akses layer spesifik
print(model[0].weight.shape)  # torch.Size([20, 10])

Train dan Eval Mode¶

import torch.nn as nn

model = nn.Sequential(
    nn.Linear(10, 20),
    nn.Dropout(0.5),
    nn.Linear(20, 5)
)

# Training mode (dropout aktif)
model.train()
print(f"Training: {model.training}")

# Evaluation mode (dropout non-aktif)
model.eval()
print(f"Training: {model.training}")

Penting

Selalu panggil model.eval() sebelum inference dan model.train() sebelum training.

Move ke Device¶

import torch
import torch.nn as nn

model = nn.Linear(10, 5)

# Pindah ke GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Input juga harus di device yang sama
x = torch.randn(3, 10).to(device)
output = model(x)

Latihan¶

Buat model MLP dengan 3 hidden layers dan dropout
Implementasikan CNN sederhana untuk MNIST
Buat model dengan residual connection
Hitung jumlah parameter trainable dalam model