Python-Tensorflow图像数字验证码识别

通过Tensorflow搭建CNN模型实现验证码识别；
当前准确率：92%左右；
训练用时：5小时（因为本项目预加载了训练模型，所以用时远少于5小时）；
Tensorflow版本：1.13.1
实验数据：图片二维码
数据示例：

import os
import random
from PIL import Image
import numpy as np
import tensorflow as tf
from datetime import datetime

参数配置项

参数的含义可以直接参考代码注释；

class Config(object):
    width = 160  # 验证码图片的宽
    height = 60  # 验证码图片的高
    char_num = 4  # 验证码字符个数
    characters = range(10)

    test_folder = '/home/kesci/input/captcha2599/captcha/test'
    train_folder = '/home/kesci/input/captcha2599/captcha/train'
    validation_folder = '/home/kesci/input/captcha2599/captcha/validation'
    saver_folder = 'checkpoints'

    alpha = 1e-3  # 学习率
    Epoch = 100  # 训练轮次
    batch_size = 64  # 批次数量
    keep_prob = 0.5  # dropout比例
    print_per_batch = 20  # 每多少次输出结果
    save_per_batch = 20

读取验证码

验证码图片示例：

0478

read_data():返回图片数组（numpy.array格式）和标签（即文件名）；
label2vec():将文件名转为向量；
    例：

    label = '1327'  

    label_vec = [0,1,0,0,0,0,0,0,0,0,  
                 0,0,0,1,0,0,0,0,0,0,  
                 0,0,1,0,0,0,0,0,0,0,  
                 0,0,0,0,0,0,0,1,0,0]
load_data():加载文件夹下所有图片，返回图片数组，标签和图片数量。

class ReadData:

    def __init__(self):
        self.test_img = os.listdir(Config.test_folder)
        self.train_img = os.listdir(Config.train_folder)
        self.sample_num = len(self.train_img)

    def read_data(self, path):
        img = Image.open(path).convert('L')
        image_array = np.array(img)
        image_data = image_array.flatten() / 255.0
        # 切割图片路径
        label = os.path.splitext(os.path.split(path)[1])[0]
        label_vec = self.label2vec(label)
        return image_data, label_vec

    @staticmethod
    def label2vec(label):
        """
        将验证码标签转为40维的向量。
        :param label: 1327
        :return:
            [0,1,0,0,0,0,0,0,0,0,
            0,0,0,1,0,0,0,0,0,0,
            0,0,1,0,0,0,0,0,0,0,
            0,0,0,0,0,0,0,1,0,0]
        """
        label_vec = np.zeros(Config.char_num * len(Config.characters))
        for i, num in enumerate(label):
            idx = i * len(Config.characters) + int(num)
            label_vec[idx] = 1
        return label_vec

    def load_data(self, folder):
        """
        加载样本数据
        :param folder: 图片存放文件夹
        :return:
            data：图片数据
            label：图片标签
            size：图片数量
        """
        if os.path.exists(folder):
            path_list = os.listdir(folder)
            size = len(path_list)
            data = np.zeros([size, Config.height * Config.width])
            label = np.zeros([size, Config.char_num * len(Config.characters)])
            for i, img_path in enumerate(path_list):
                path = '%s/%s' % (folder, img_path)
                data[i], label[i] = self.read_data(path)
            return data, label, size
        else:
            raise IOError('No such directory, please check again.')

模型定义

采用三层卷积，filter_size均为5，为避免过拟合，每层卷积后面均接dropout操作，最终将的图像转为的矩阵。大致结构如下：
模型结构

class CNN:
    def __init__(self):
        self.input_x = tf.placeholder(
            tf.float32, [None, Config.width * Config.height], name='input_x')
        self.input_y = tf.placeholder(
            tf.float32, [None, Config.char_num * len(Config.characters)], name='input_y')
        self.keep_prob = tf.placeholder("float")
        self.training = tf.placeholder(tf.bool, name='is_training')

        self.CNN_model()

    @staticmethod
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    @staticmethod
    def bias_variable(shape):
        initial = tf.constant(0.1, shape=shape)
        return tf.Variable(initial)

    @staticmethod
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")

    @staticmethod
    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


    def CNN_model(self):
        x_image = tf.reshape(self.input_x,
                             [-1, Config.height, Config.width, 1], name='x_image')
        # batch normalization
        x_norm = tf.layers.batch_normalization(x_image,
                                               training=self.training ,momentum=0.9)

        # 卷积层 1:
        w_cv1 = self.weight_variable([5, 5, 1, 32])
        b_cv1 = self.bias_variable([32])
        h_cv1 = tf.nn.relu(self.conv2d(x_norm, w_cv1) + b_cv1)
        h_mp1 = self.max_pool_2x2(h_cv1)

        # 卷积层 2
        w_cv2 = self.weight_variable([5, 5, 32, 64])
        b_cv2 = self.bias_variable([64])
        h_cv2 = tf.nn.relu(self.conv2d(h_mp1, w_cv2) + b_cv2)
        h_mp2 = self.max_pool_2x2(h_cv2)

        # 卷积层 3
        w_cv3 = self.weight_variable([5, 5, 64, 64])
        b_cv3 = self.bias_variable([64])
        h_cv3 = tf.nn.relu(self.conv2d(h_mp2, w_cv3) + b_cv3)
        h_mp3 = self.max_pool_2x2(h_cv3)

        # 全连接
        W_fc1 = self.weight_variable([20 * 8 * 64, 128])
        b_fc1 = self.bias_variable([128])
        h_mp3_flat = tf.reshape(h_mp3, [-1, 20 * 8 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_mp3_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, self.keep_prob)

        # 输出层
        W_fc2 = self.weight_variable([128, Config.char_num * len(Config.characters)])
        b_fc2 = self.bias_variable([Config.char_num * len(Config.characters)])
        output = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2)

        self.loss = (tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_y, logits=output)))
        predict = tf.reshape(output, [-1, Config.char_num,
                                      len(Config.characters)], name='predict')
        labels = tf.reshape(self.input_y, [-1, Config.char_num,
                                           len(Config.characters)], name='labels')

        self.predict_max_idx = tf.argmax(predict, axis=2, name='predict_max_idx')
        labels_max_idx = tf.argmax(labels, axis=2, name='labels_max_idx')
        predict_correct_vec = tf.equal(self.predict_max_idx, labels_max_idx)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.train_step = tf.train.AdamOptimizer(
                Config.alpha).minimize(self.loss)
        self.accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32))

培训与评估

next_batch()：迭代器，分批次返还数据； feed_data()：给模型“喂”数据；
x：图像数组；
y：图像标签；
keep_prob：dropout比例； evaluate()：模型评估，用于验证集和测试集。 run_model()：训练&评估

class Run:

    def __init__(self):
        read = ReadData()
        self.test_x, self.test_y, self.test_num = read.load_data(folder=Config.test_folder)
        self.train_x, self.train_y, self.train_num = read.load_data(folder=Config.train_folder)
        self.val_x, self.val_y, self.val_num = read.load_data(folder=Config.validation_folder)

        print('Images for train ：{}, for validation : {}, for test : {}' \
              .format(self.train_num, self.val_num, self.test_num))

        self.run_model()

    @staticmethod
    def next_batch(x, y, length):
        if length % Config.batch_size == 0:
            times = int(length / Config.batch_size)
        else:
            times = int(length / Config.batch_size) + 1

        start_id = 0
        for _ in range(times):
            end_id = min(start_id + Config.batch_size, length)
            batch_data = x[start_id:end_id]
            batch_label = y[start_id:end_id]
            start_id = end_id
            yield batch_data, batch_label

    @staticmethod
    def feed_data(x, y, keep_prob, is_training=True):
        feed_dict = {model.input_x: x,
                     model.input_y: y,
                     model.keep_prob: keep_prob,
                     model.training: is_training}
        return feed_dict

    def evaluate(self, sess, val_x, val_y, val_size):
        total_loss = 0.
        total_acc = 0.

        for x_, y_ in self.next_batch(val_x, val_y, val_size):
            length = len(y_)
            feed_dict = self.feed_data(x_, y_, 1.0, False)
            val_acc, val_loss = sess.run([model.accuracy, model.loss], feed_dict=feed_dict)
            total_acc += val_acc * length
            total_loss += val_loss * length
        return total_acc / val_size, total_loss / val_size

    def run_model(self):

        saver = tf.train.Saver(max_to_keep=1)
        if not os.path.exists(Config.saver_folder):
            os.mkdir(Config.saver_folder)
        save_path = os.path.join(Config.saver_folder, 'best_validation')

        total_batch = 0
        best_acc = 0
        last_improved_step = 0
        require_steps = 100
        flag = False
        start_time = datetime.now()
        # 本地模型保存位置
        save_path = os.path.join(Config.saver_folder, 'best_validation')
        
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        # 从头开始训练请注释掉以下两行代码
        saver = tf.train.Saver()
        saver.restore(sess=sess, save_path=save_path)

        for epoch in range(Config.Epoch):
            print('Epoch : {}'.format(epoch + 1))
            for x, y in self.next_batch(self.train_x, self.train_y, self.train_num):
                feed_dict = self.feed_data(x, y, Config.keep_prob, True)
                sess.run(model.train_step, feed_dict=feed_dict)

                if total_batch % Config.print_per_batch == 0:
                    # 输出在验证集和训练集上的准确率和损失值
                    feed_dict[model.keep_prob] = 1.0
                    feed_dict[model.training] = False
                    train_accuracy, train_loss = sess.run([model.accuracy, model.loss],
                                                          feed_dict=feed_dict)
                    val_acc, val_loss = self.evaluate(sess, self.val_x, self.val_y, self.val_num)

                    if val_acc > best_acc:
                        # 记录最好的结果
                        best_acc = val_acc
                        last_improved_step = total_batch
                        # 保存模型
                        saver.save(sess=sess, save_path=save_path)
                        improved = '*'
                    else:
                        improved = ''

                    msg = 'Step {:5}, train_acc:{:8.2%}, train_loss:{:6.2f},' \
                          ' val_acc:{:8.2%}, val_loss:{:6.2f}, improved:{:3}'
                    print(msg.format(total_batch, train_accuracy, train_loss, val_acc, val_loss, improved))

                if total_batch - last_improved_step > require_steps:
                    flag = True
                    break

                total_batch += 1
            if flag:
                print('No improvement for over {} steps, auto-stopping....'.format(require_steps))
                break
        end_time = datetime.now()
        time_diff = (end_time - start_time).seconds
        print('Time Usage : {:.2f} hours'.format(time_diff / 3600.0))
        # 输出在测试集上的准确率
        test_acc, test_loss = self.evaluate(sess, self.test_x, self.test_y, self.test_num)

        print("Test accuracy:{:8.2%}, loss:{:6.2f}".format(test_acc, test_loss))
        sess.close()

model = CNN()
Run()

WARNING:tensorflow:From :36:
batch_normalization (from tensorflow.python.layers.normalization) is
deprecated and will be removed in a future version. Instructions for
updating: Use keras.layers.batch_normalization instead.
WARNING:tensorflow:From
/opt/conda/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263:
colocate_with (from tensorflow.python.framework.ops) is deprecated and
will be removed in a future version. Instructions for updating:
Colocations handled automatically by placer. WARNING:tensorflow:From
:61: calling dropout (from
tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be
removed in a future version. Instructions for updating: Please use
rateinstead ofkeep_prob. Rate should be set torate = 1 - keep_prob. Images for train ：10000, for validation : 1000, for test :
1000 WARNING:tensorflow:From
/opt/conda/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1266:
checkpoint_exists (from
tensorflow.python.training.checkpoint_management) is deprecated and
will be removed in a future version. Instructions for updating: Use
standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoints/best_validation
Epoch : 1 Step 0, train_acc: 99.22%, train_loss: 0.02, val_acc:
91.07%, val_loss: 0.08, improved:* Step 20, train_acc: 98.44%, train_loss: 0.02, val_acc: 90.42%, val_loss: 0.07, improved:
Step 40, train_acc: 99.22%, train_loss: 0.02, val_acc: 91.25%,
val_loss: 0.07, improved:* Step 60, train_acc: 99.22%,
train_loss: 0.02, val_acc: 90.93%, val_loss: 0.07, improved:
Step 80, train_acc: 98.83%, train_loss: 0.02, val_acc: 91.05%,
val_loss: 0.07, improved: Step 100, train_acc: 98.44%,
train_loss: 0.02, val_acc: 91.20%, val_loss: 0.07, improved:
Step 120, train_acc: 99.22%, train_loss: 0.02, val_acc: 90.90%,
val_loss: 0.08, improved: Step 140, train_acc: 99.22%,
train_loss: 0.02, val_acc: 90.88%, val_loss: 0.07, improved: No
improvement for over 100 steps, auto-stopping… Time Usage : 0.30
hours Test accuracy: 91.68%, loss: 0.07

欢迎点赞收藏~

文章出处登录后可见！

已经登录？立即刷新

Python-Tensorflow图像数字验证码识别

参数配置项

读取验证码

模型定义

相关推荐