如何在pytorch中加载提前停止计数器

乘风 2年前 pytorch 225

原文标题 ：How to load early stopping counter in pytorch

import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, models_dict):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, models_dict)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, models_dict)
            self.counter = 0

    def save_checkpoint(self, val_loss, models_dict):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        for file_name in models_dict.keys():
            torch.save(models_dict[file_name], self.path + "/" + file_name + ".pkl")
        self.val_loss_min = val_loss

上面的代码是我正在使用的 EarlyStopping 类。我正在尝试使用 UNet 制作图像分割模型。问题是我不能整天保持运行时。使用下面的代码，我能够保存每个时期都对自己进行建模，但是每当我重置运行时，我的早期停止计数器都会初始化为 1。经过一个时期后，会生成一个 pkl 文件，但我无法找到如何从那一点开始的答案。有吗我可以从我停止的柜台开始吗？

def save(ckpt_dir, net, optim, epoch):
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    torch.save({'net': net.state_dict(), 'optim': optim.state_dict()},
               "%s/model_epoch%d.pth" % (ckpt_dir, epoch))


def load(ckpt_dir, net, optim):
    if not os.path.exists(ckpt_dir):
        epoch = 0
        return net, optim, epoch

    ckpt_lst = os.listdir(ckpt_dir)
    print(ckpt_lst)
    ckpt_lst.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    dict_model = torch.load('%s/%s' % (ckpt_dir, ckpt_lst[-1]))
    print(dict_model.keys())

    net.load_state_dict(dict_model['net'])
    optim.load_state_dict(dict_model['optim'])
    epoch = int(ckpt_lst[-1].split('epoch')[1].split('.pth')[0])

    return net, optim, epoch

原文链接：https://stackoverflow.com//questions/71891964/how-to-load-early-stopping-counter-in-pytorch

我来回复

asymptote 评论
您可以在保存功能中将计数器值与模型状态一起保存：
```
torch.save({'net': net.state_dict(), 'optim': optim.state_dict(), 'es_counter': early_stopping.counter},
           "%s/model_epoch%d.pth" % (ckpt_dir, epoch)
```
这里，early_stopping是类EarlyStopping的对象。现在，您可以在加载函数中加载计数器值和模型状态：
```
es_counter = model_dict['es_counter']
#...

return net, optim, epoch, es_counter
```
现在您可以使用计数器值来更新主函数中的early_stopping对象。
2年前 0条评论