为什么验证损失是常数?
pytorch 525
原文标题 :Why is the validation loss constant?
我正在尝试在我制作的自定义数据集上使用来自 Aladdin Persson 的 unet 模型。问题是“在训练期间,训练损失正在减少,而验证损失是恒定的。而且我只是无法弄清楚问题所在。我在训练集中有 368 张图片,在验证集中有 51 张图片。[橙色是验证损失,蓝色是训练][1]我还发布了我的训练代码以及我在验证集上检查准确性的部分。
这部分是train_fn。
for batch_idx, (data, targets) in enumerate(loop):
#img = data.cpu().squeeze(0).permute(1,2,0).numpy()
#plt.imshow(img)
data = data.to(device=DEVICE)
targets = targets.float().unsqueeze(1).to(device=DEVICE)
# forward
with torch.cuda.amp.autocast():
predictions = model(data)
loss = loss_fn(predictions, targets)
# backward
optimizer.zero_grad()
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
running_loss += loss.item()
# update tqdm loop
loop.set_postfix(loss=loss.item())
train_loss = running_loss/len(loader)
train_losses.append(train_loss)
epochs.append(epoch)
scheduler.step()
还有训练部分
for epoch in range(1,NUM_EPOCHS):
train_fn(train_loader, model, optimizer, loss_fn, scaler, epoch, scheduler)
#save model
checkpoint = {
"state_dict": model.state_dict(),
"optimizer":optimizer.state_dict(),
}
save_checkpoint(checkpoint)
# check accuracy
val_loss = check_accuracy(epoch, val_loader, model, loss_fn, device=DEVICE)
val_losses.append(val_loss)
# print some examples to a folder
save_predictions_as_imgs(
val_loader, model, folder="saved_images/", device=DEVICE
)
plt.plot(epochs, train_losses)
plt.plot(epochs, val_losses)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss function')
plt.show()
和 check_accuracy
def check_accuracy(epoch ,loader, model, loss_fn, device="cuda"):
try:
val_losses
except NameError:
val_losses = []
num_correct = 0
num_pixels = 0
dice_score = 0
running_loss = 0
idx = 1
model.eval()
with torch.no_grad():
for x, y in loader:
# if idx <= 10:
# grid_data = make_grid(x)
# grid_mask = make_grid(y)
# f, axarr_val = plt.subplots(2,1)
# plt.title('Validation transform')
# axarr_val[0].imshow(grid_data.permute(1,2,0).numpy())
# axarr_val[1].imshow(grid_mask.permute(1,2,0).numpy())
# plt.savefig("transformacije/validation/fig" + str(epoch+1) + str(idx) + ".png")
# plt.close(f)
# idx = idx+1
x = x.to(device)
y = y.to(device).unsqueeze(1)
preds = torch.sigmoid(model(x))
preds = (preds > 0.5).float()
num_correct += (preds == y).sum()
num_pixels += torch.numel(preds)
dice_score += (2 * (preds * y).sum()) / (
(preds + y).sum() + 1e-8
)
loss = loss_fn(preds, y)
running_loss += loss.item()
val_loss = running_loss/len(loader)
print(
f"Got {num_correct}/{num_pixels} with acc {num_correct/num_pixels*100:.2f}"
)
print(f"Dice score: {dice_score/len(loader)}")
print(f"Validation Loss: {val_loss}")
model.train()
return val_loss
如果您能提供帮助,我将不胜感激。谢谢。[1]:https://i.stack.imgur.com/tRh89.png