[TensorFlow深度学习深入]实战二·使用CNN网络识别破解数字验证码

参考博客。在此基础上做了小修改。其中CNN网络部分仿照我们入门实战六的内容，如果不太清楚CNN可以再去回顾一下。本博文数据集。

代码部分

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import tensorflow as tf
import numpy as np
from PIL import Image
import os
import random
import time
 
#验证码图片的存放路径
CAPTCHA_IMAGE_PATH = './1CNN/Number_app/datas/train/'
#验证码图片的宽度
CAPTCHA_IMAGE_WIDHT = 160
#验证码图片的高度
CAPTCHA_IMAGE_HEIGHT = 60
 
CHAR_SET_LEN = 10
CAPTCHA_LEN = 4
 
#60%的验证码图片放入训练集中
TRAIN_IMAGE_PERCENT = 0.8
#训练集，用于训练的验证码图片的文件名
TRAINING_IMAGE_NAME = []
#验证集，用于模型验证的验证码图片的文件名
VALIDATION_IMAGE_NAME = []

#存放训练好的模型的路径
MODEL_SAVE_PATH = './1CNN/Number_app/models/'

def get_image_file_name(imgPath=CAPTCHA_IMAGE_PATH):
    fileName = []
    for filePath in os.listdir(imgPath):
        captcha_name = filePath.split('/')[-1]
        fileName.append(captcha_name)
    return fileName
    
#将验证码转换为训练时用的标签向量，维数是 40   
#例如，如果验证码是 ‘0296’ ，则对应的标签是
# [1 0 0 0 0 0 0 0 0 0
#  0 0 1 0 0 0 0 0 0 0
#  0 0 0 0 0 0 0 0 0 1
#  0 0 0 0 0 0 1 0 0 0]
def name2label(name):
    label = np.zeros(CAPTCHA_LEN * CHAR_SET_LEN)
    for i, c in enumerate(name):
        idx = i*CHAR_SET_LEN + ord(c) - ord('0')
        label[idx] = 1
    return label
    

#取得验证码图片的数据以及它的标签        
def get_datas_and_labels(fileName, filePath=CAPTCHA_IMAGE_PATH):
    train_images = []
    train_labels= []
    for e in fileName:
        pathName = os.path.join(CAPTCHA_IMAGE_PATH, e)
        img = Image.open(pathName)
        #转为灰度图
        img = img.convert("L")       
        image_array = np.array(img)    
        image_data = image_array/255
        image_label = name2label(e[0:CAPTCHA_LEN])
        #image_label = image_label.reshape(-1,10)
        train_images.append(image_data)
        train_labels.append(image_label)
    train_images = np.array(train_images)
    train_labels = np.array(train_labels)
    return train_images, train_labels

x = tf.placeholder("float", shape=[None,60,160],name="x")  
#训练标签数据  
y_ = tf.placeholder("float", shape=[None, 40],name="y_") 

x_image = tf.reshape(x, [-1,60,160,1])  
conv1_weights = tf.get_variable("conv1_weights", [5, 5, 1, 16], initializer=tf.truncated_normal_initializer(stddev=0.1)) #过滤器大小为5*5, 当前层深度为1， 过滤器的深度为32  
conv1_biases = tf.get_variable("conv1_biases", [16], initializer=tf.constant_initializer(0.0))  

conv1 = tf.nn.conv2d(x_image, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') #移动步长为1, 使用全0填充  
relu1 = tf.nn.relu( tf.nn.bias_add(conv1, conv1_biases) ) #激活函数Relu去线性化  

#第二层：最大池化层  
#池化层过滤器的大小为2*2, 移动步长为2，使用全0填充  
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  

#第三层：卷积层  
conv2_weights = tf.get_variable("conv2_weights", [5, 5, 16, 32], initializer=tf.truncated_normal_initializer(stddev=0.1)) #过滤器大小为5*5, 当前层深度为32， 过滤器的深度为64  
conv2_biases = tf.get_variable("conv2_biases", [32], initializer=tf.constant_initializer(0.0))  
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') #移动步长为1, 使用全0填充  
relu2 = tf.nn.relu( tf.nn.bias_add(conv2, conv2_biases) )  

#第四层：最大池化层  
#池化层过滤器的大小为2*2, 移动步长为2，使用全0填充  
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  

#第五层：卷积层  
conv3_weights = tf.get_variable("conv3_weights", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1)) #过滤器大小为5*5, 当前层深度为32， 过滤器的深度为64  
conv3_biases = tf.get_variable("conv3_biases", [64], initializer=tf.constant_initializer(0.0))  
conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') #移动步长为1, 使用全0填充  
relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases) )  

#第六层：最大池化层  
#池化层过滤器的大小为2*2, 移动步长为2，使用全0填充  
pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  
#第七层：全连接层  
fc1_weights = tf.get_variable("fc1_weights", [8 * 20 * 64, 1024], initializer=tf.truncated_normal_initializer(stddev=0.1)) #7*7*64=3136把前一层的输出变成特征向量  
fc1_baises = tf.get_variable("fc1_baises", [1024], initializer=tf.constant_initializer(0.1))  
pool3_vector = tf.reshape(pool3, [-1, 8 * 20 * 64])  
fc1 = tf.nn.relu(tf.matmul(pool3_vector, fc1_weights) + fc1_baises)  
  
#为了减少过拟合，加入Dropout层  
keep_prob = tf.placeholder(tf.float32,name="keep_prob") 
fc1_dropout = tf.nn.dropout(fc1, keep_prob)  

#第八层：全连接层  
fc2_weights = tf.get_variable("fc2_weights", [1024, 40], initializer=tf.truncated_normal_initializer(stddev=0.1)) #神经元节点数1024, 分类节点10  
fc2_biases = tf.get_variable("fc2_biases", [40], initializer=tf.constant_initializer(0.1))  
fc2 = (tf.matmul(fc1_dropout, fc2_weights) + fc2_biases) 

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=fc2))



predict = tf.reshape(fc2, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='predict')
labels = tf.reshape(y_, [-1, CAPTCHA_LEN, CHAR_SET_LEN], name='labels')
#预测结果
#请注意 predict_max_idx 的 name，在测试model时会用到它
predict_max_idx = tf.argmax(predict, axis=2, name='predict_max_idx')
labels_max_idx = tf.argmax(labels, axis=2, name='labels_max_idx')
predict_correct_vec = tf.equal(predict_max_idx, labels_max_idx)
accuracy = tf.reduce_mean(tf.cast(predict_correct_vec, tf.float32))
#第七层：输出层  


train_step = tf.train.AdamOptimizer(0.002).minimize(loss)

if __name__ == '__main__':    
    image_filename_list = get_image_file_name(CAPTCHA_IMAGE_PATH)
    np.random.shuffle(image_filename_list)#window 平台这条语句一定不能少。。
    lens = len(image_filename_list)
    seq = 0.8
    lens1 = int(seq*lens)
    train_filename_list = image_filename_list[:lens1]
    test_filename_list = image_filename_list[lens1:]
    print(lens,lens1)

    train_images, train_labels = get_datas_and_labels(fileName=train_filename_list)
    print(train_filename_list[0],train_images.shape,
    np.argmax(train_labels[0,:10]),np.argmax(train_labels[0,10:20]),np.argmax(train_labels[0,20:30]),np.argmax(train_labels[0,30:]))

    with tf.Session() as sess:

    #开始训练  
        srun = sess.run
        srun(tf.global_variables_initializer())
        saver = tf.train.Saver()
        
        for i in range(3001):  
            start_step = i*100 % 7960
            stop_step = start_step+100

            batch_x, batch_y = train_images[start_step:stop_step], train_labels[start_step:stop_step]
            srun(train_step,feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.8}) #训练阶段使用80%的Dropout  
            if i%10 == 0:  
                loss_val = srun(loss,{x: batch_x, y_: batch_y, keep_prob: 1.0})
                acc_val = sess.run(accuracy, feed_dict={x:train_images[:200], y_: train_labels[:200], keep_prob:1.0})
                print(i,loss_val,acc_val)
                if acc_val > 0.6:
                    saver.save(sess, MODEL_SAVE_PATH+"crack_captcha.model", global_step=i)

运行结果

0 7.2514677 0.0975
10 0.3490238 0.09375
20 0.32853785 0.11125
30 0.3260678 0.0975
...
150 0.32142633 0.14
160 0.3182468 0.145
170 0.30834803 0.24
180 0.2962727 0.3125
190 0.29101092 0.33
200 0.2805277 0.3525
210 0.26561457 0.385
220 0.2575577 0.40375
230 0.24210992 0.43375
240 0.23269977 0.47625
250 0.22842664 0.4925
260 0.21917553 0.5425
270 0.22002003 0.52875
280 0.20558219 0.54375
290 0.21356806 0.54125
300 0.2039287 0.56
310 0.20335045 0.55625
320 0.19401187 0.5975
330 0.19049801 0.65625
...
1480 0.0026436544 0.9925
1490 0.0020101557 0.99375
...
1680 0.0016069501 1.0
1690 0.0010696264 0.99875
1700 0.0016226814 0.99125
1710 0.0021925105 0.99375
1720 0.0012222779 0.99875

结果分析 在训练1000步后，训练集200组数据的准确率到达99%，表明此网络结构的优异性，可以在此基础上做数字验证码的破解工作。