diff --git a/1_sample_loader.py b/1_sample_loader.py index 63087a5..b90106c 100644 --- a/1_sample_loader.py +++ b/1_sample_loader.py @@ -1,5 +1,4 @@ #-*- coding: utf-8 -*- - import os import cv2 import numpy as np @@ -12,8 +11,8 @@ import digit_detector.region_proposal as rp N_IMAGES = None -DIR = '../datasets/svhn/train' -ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json" +DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train' +ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/train/digitStruct.json" NEG_OVERLAP_THD = 0.05 POS_OVERLAP_THD = 0.6 PATCH_SIZE = (32,32) @@ -24,7 +23,7 @@ files = file_io.list_files(directory=DIR, pattern="*.png", recursive_option=False, n_files_to_sample=N_IMAGES, random_order=False) n_files = len(files) n_train_files = int(n_files * 0.8) - print n_train_files + print(n_train_files) extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator()) train_samples, train_labels = extractor.extract_patch(files[:n_train_files], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD) @@ -32,8 +31,8 @@ extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator()) validation_samples, validation_labels = extractor.extract_patch(files[n_train_files:], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD) - print train_samples.shape, train_labels.shape - print validation_samples.shape, validation_labels.shape + print(train_samples.shape, train_labels.shape) + print(validation_samples.shape, validation_labels.shape) # show.plot_images(samples, labels.reshape(-1,).tolist()) diff --git a/2_train.py b/2_train.py index 882be3f..ca51fb8 100644 --- a/2_train.py +++ b/2_train.py @@ -6,32 +6,36 @@ import digit_detector.preprocess as preproc import digit_detector.train as train_ -DIR = '../datasets/svhn' +# DIR = '/datasets/svhn' NB_FILTERS = 32 NB_EPOCH = 5 +BATCHSIZE = 128 DETECTOR_FILE = 'detector_model.hdf5' RECOGNIZER_FILE = 'recognize_model.hdf5' if __name__ == "__main__": - images_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "images") - labels_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "labels") + images_train = file_io.FileHDF5().read("train.hdf5", "images") + labels_train = file_io.FileHDF5().read("train.hdf5", "labels") - images_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "images") - labels_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "labels") + images_val = file_io.FileHDF5().read("val.hdf5", "images") + labels_val = file_io.FileHDF5().read("val.hdf5", "labels") # Train detector X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 2) - print "mean value of the train images : {}".format(mean_value) # 107.524 - print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape) # (457723, 32, 32, 1), (113430, 32, 32, 1) - train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=2, save_file=DETECTOR_FILE) - + print("mean value of the train images : {}".format(mean_value)) # 108.784 + print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)) # (1279733, 32, 32, 1), (317081, 32, 32, 1) + train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=2, save_file=DETECTOR_FILE) + # loss: 0.0784 - accuracy: 0.9744 - val_loss: 0.0997 - val_accuracy: 0.9724 + # Test score: 0.09970200061798096 + # Test accuracy: 0.9724171161651611 + # Train recognizer X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 10) - print "mean value of the train images : {}".format(mean_value) # 112.833 - print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape) # (116913, 32, 32, 1), (29456, 32, 32, 1) - train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=10, save_file=RECOGNIZER_FILE) - # acc: 0.9541 - val_loss: 0.2125 - val_acc: 0.9452 - - + print("mean value of the train images : {}".format(mean_value)) # 115.503 + print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)) # (267234, 32, 32, 1), (67359, 32, 32, 1) + train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=10, save_file=RECOGNIZER_FILE) + # loss: loss: 0.1070 - accuracy: 0.9685 - val_loss: 0.2196 - val_accuracy: 0.9532 + # Test score: 0.21958307921886444 + # Test accuracy: 0.9531614184379578 \ No newline at end of file diff --git a/3_detect.py b/3_detect.py index 646576d..0c24d45 100644 --- a/3_detect.py +++ b/3_detect.py @@ -13,8 +13,8 @@ detect_model = "detector_model.hdf5" recognize_model = "recognize_model.hdf5" -mean_value_for_detector = 107.524 -mean_value_for_recognizer = 112.833 +mean_value_for_detector = 108.784 +mean_value_for_recognizer = 115.503 model_input_shape = (32,32,1) DIR = '../datasets/svhn/train' diff --git a/4_evaluate.py b/4_evaluate.py index cedc338..e2faa2c 100644 --- a/4_evaluate.py +++ b/4_evaluate.py @@ -11,19 +11,18 @@ import digit_detector.file_io as file_io import digit_detector.preprocess as preproc import digit_detector.annotation as ann -import digit_detector.evaluate as eval +import digit_detector.evaluate as eva import digit_detector.classify as cls - model_filename = "detector_model.hdf5" model_input_shape = (32,32,1) -DIR = '../datasets/svhn/train' -ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json" +DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train' +ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/test/digitStruct.json" detect_model = "detector_model.hdf5" recognize_model = "recognize_model.hdf5" -mean_value_for_detector = 107.524 -mean_value_for_recognizer = 112.833 +mean_value_for_detector = 108.784 +mean_value_for_recognizer = 115.503 if __name__ == "__main__": @@ -43,9 +42,9 @@ det = detect.DigitSpotter(detector, recognizer, proposer) # 3. Evaluate average precision - evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator()) + evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator()) recall, precision, f1_score = evaluator.run(img_files) - # recall value : 0.513115508514, precision value : 0.714285714286, f1_score : 0.597214783074 + # recall value : 0.025614754098360656, precision value : 0.0328515111695138, f1_score : 0.02878526194588371 # 4. Evaluate MSER detector = cls.TrueBinaryClassifier(input_shape=model_input_shape) @@ -53,12 +52,6 @@ # Todo : detector, recognizer 를 none type 으로 det = detect.DigitSpotter(detector, recognizer, proposer) - evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator()) + evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator()) recall, precision, f1_score = evaluator.run(img_files, do_nms=False) - #recall value : 0.630004601933, precision value : 0.0452547023239, f1_score : 0.0844436220084 - - - - - - + # recall value : 0.08504098360655737, precision value : 0.001661811374398094, f1_score : 0.0032599196802922145 \ No newline at end of file diff --git a/conf/svhn.json b/conf/svhn.json index e0e64a9..03007c7 100644 --- a/conf/svhn.json +++ b/conf/svhn.json @@ -1,22 +1,22 @@ { "dataset": { - "pos_data_dir": "../datasets/svhn/train", + "pos_data_dir": "/datasets/svhn/train", "pos_format": "*.png", - "test_data_dir": "../datasets/svhn/test", - "extra_data_dir": "../datasets/svhn/extra", - "easy_data_dir": "../datasets/svhn/easy_test", - "annotation_file": "../datasets/svhn/train/digitStruct.json", - "annotation_file_test": "../datasets/svhn/test/digitStruct.json", - "neg_data_dir": "../datasets/houses", + "test_data_dir": "/datasets/svhn/test", + "extra_data_dir": "/datasets/svhn/extra", + "easy_data_dir": "/datasets/svhn/easy_test", + "annotation_file": "/datasets/svhn/train/digitStruct.json", + "annotation_file_test": "/datasets/svhn/test/digitStruct.json", + "neg_data_dir": "/datasets/houses", "neg_format": "*.jpg", - "neg_data_from_train": "../datasets/svhn/train/negative_images", + "neg_data_from_train": "/datasets/svhn/train/negative_images", "neg_data_from_train_format": "*.png" }, "extractor": { - "output_file": "../datasets/output/svhn_features.hdf5", + "output_file": "/datasets/output/svhn_features.hdf5", "padding": 0, "sampling_ratio_for_positive_images": 1.0, "sampling_ratio_for_negative_images": 1.0, @@ -37,10 +37,10 @@ "algorithm": "ConvNet", "parameters": { - "model_file": "models/detector_model.hdf5", + "model_file": "/models/detector_model.hdf5", "mean_value": 84.9649 }, - "output_file": "models/detector_model.hdf5" + "output_file": "/models/detector_model.hdf5" }, "detector": @@ -50,7 +50,7 @@ "pyramid_scale": 0.9, "window_dim": [32, 16], "min_probability": 0.7, - "output_file": "../datasets/output/svhn_detector.pkl" + "output_file": "/datasets/output/svhn_detector.pkl" }, "hard_negative_mine": diff --git a/detector_model.hdf5 b/detector_model.hdf5 index 5a843b2..9c0cfe5 100644 Binary files a/detector_model.hdf5 and b/detector_model.hdf5 differ diff --git a/digit_detector/__pycache__/detect.cpython-39.pyc b/digit_detector/__pycache__/detect.cpython-39.pyc new file mode 100644 index 0000000..23c9482 Binary files /dev/null and b/digit_detector/__pycache__/detect.cpython-39.pyc differ diff --git a/digit_detector/__pycache__/evaluate.cpython-39.pyc b/digit_detector/__pycache__/evaluate.cpython-39.pyc new file mode 100644 index 0000000..2123232 Binary files /dev/null and b/digit_detector/__pycache__/evaluate.cpython-39.pyc differ diff --git a/digit_detector/annotation.py b/digit_detector/annotation.py index 644a3a6..ff492ed 100644 --- a/digit_detector/annotation.py +++ b/digit_detector/annotation.py @@ -1,10 +1,8 @@ #-*- coding: utf-8 -*- - -import file_io +from digit_detector import file_io import os import numpy as np - class Annotation: def __init__(self, annotation_file): diff --git a/digit_detector/classify.py b/digit_detector/classify.py index 56c4702..912e0fa 100644 --- a/digit_detector/classify.py +++ b/digit_detector/classify.py @@ -1,5 +1,4 @@ #-*- coding: utf-8 -*- - from abc import ABCMeta, abstractmethod import keras import numpy as np @@ -29,8 +28,9 @@ def predict_proba(self, patches): probs (N, n_classes) """ patches_preprocessed = self._preprocessor.run(patches) - probs = self._model.predict_proba(patches_preprocessed, verbose=0) - return probs + predict_x = self._model.predict(patches_preprocessed) + + return predict_x class TrueBinaryClassifier(Classifier): """Classifier always predict true """ diff --git a/digit_detector/evaluate.py b/digit_detector/evaluate.py index 7306ba7..587f699 100644 --- a/digit_detector/evaluate.py +++ b/digit_detector/evaluate.py @@ -1,10 +1,8 @@ #-*- coding: utf-8 -*- - import progressbar import cv2 import numpy as np -import region_proposal as rp - +#import region_proposal as rp class Evaluator(object): @@ -76,4 +74,4 @@ def _calc_true_positive(self, overlaps_per_truth): def _print_msg(self, recall, precision, f1_score): - print "recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score) + print("recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score)) diff --git a/digit_detector/file_io.py b/digit_detector/file_io.py index 4006bb9..d78696e 100644 --- a/digit_detector/file_io.py +++ b/digit_detector/file_io.py @@ -178,6 +178,7 @@ def list_files(directory, pattern="*.*", n_files_to_sample=None, recursive_optio files = random.sample(files, n_files_to_sample) else: files = files[:n_files_to_sample] + return files diff --git a/digit_detector/region_proposal.py b/digit_detector/region_proposal.py index cc7eda3..fe2342e 100644 --- a/digit_detector/region_proposal.py +++ b/digit_detector/region_proposal.py @@ -2,10 +2,7 @@ import numpy as np import cv2 from matplotlib import pyplot as plt - -import crop -import show - +from digit_detector import crop, show class Regions: @@ -60,8 +57,8 @@ class MserRegionProposer(_RegionProposer): def detect(self, img): gray = self._to_gray(img) - mser = cv2.MSER(_delta = 1) - regions = mser.detect(gray, None) + mser = cv2.MSER_create(delta=1) + regions, _ = mser.detectRegions(gray) bounding_boxes = self._get_boxes(regions) regions = Regions(img, bounding_boxes) return regions diff --git a/digit_detector/train.py b/digit_detector/train.py index 97f8611..f8c1e32 100644 --- a/digit_detector/train.py +++ b/digit_detector/train.py @@ -1,13 +1,11 @@ - -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Convolution2D, MaxPooling2D -from keras import backend as K -from keras.preprocessing.image import ImageDataGenerator +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D +from tensorflow.keras import layers, activations +from tensorflow.keras import backend as K +from tensorflow.keras.preprocessing.image import ImageDataGenerator import numpy as np - def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size=128, nb_epoch=5, nb_classes=2, do_augment=False, save_file='models/detector_model.hdf5'): """ vgg-like deep convolutional network """ @@ -18,37 +16,30 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size # size of pooling area for max pooling pool_size = (2, 2) - # convolution kernel size - kernel_size = (3, 3) input_shape = (img_rows, img_cols, 1) - model = Sequential() - model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], - border_mode='valid', - input_shape=input_shape)) - model.add(Activation('relu')) - model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1])) - model.add(Activation('relu')) + model.add(Convolution2D(filters=nb_filters, kernel_size=3, padding='valid', + input_shape=input_shape, activation='relu')) + model.add(Convolution2D(filters=nb_filters, kernel_size=3, + input_shape=input_shape[1:], activation='relu')) model.add(MaxPooling2D(pool_size=pool_size)) - # (16, 8, 32) - model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1])) - model.add(Activation('relu')) - model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1])) - model.add(Activation('relu')) + model.add(Convolution2D(filters=nb_filters*2, kernel_size=3, + activation='relu')) + model.add(Convolution2D(filters=nb_filters*2, kernel_size=3, + activation='relu')) model.add(MaxPooling2D(pool_size=pool_size)) - # (8, 4, 64) = (2048) - + model.add(Flatten()) model.add(Dense(1024)) - model.add(Activation('relu')) + model.add(Activation(activations.relu)) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) - model.add(Activation('softmax')) + model.add(Activation(activations.softmax)) model.compile(loss='categorical_crossentropy', - optimizer='adadelta', + optimizer='adam', metrics=['accuracy']) if do_augment: @@ -60,14 +51,12 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size zoom_range=0.2) datagen.fit(X_train) model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), - samples_per_epoch=len(X_train), nb_epoch=nb_epoch, + samples_per_epoch=len(X_train), epochs=nb_epoch, validation_data=(X_test, Y_test)) else: - model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, + model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, verbose=1, validation_data=(X_test, Y_test)) score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) - model.save(save_file) - - + model.save(save_file) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a051131 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +funcsigs==1.0.2 +h5py>=2.9.0 +keras==2.10.0 +mock==2.0.0 +pbr==3.0.0 +protobuf>=3.9.2 +pyyaml==3.12 +tensorflow-gpu==2.10.1 +theano==0.9.0 +tensorflow==2.10.1 +tensorflow-intel==2.10.0 \ No newline at end of file diff --git a/results.md b/results.md new file mode 100644 index 0000000..2c358d8 --- /dev/null +++ b/results.md @@ -0,0 +1,13 @@ +# Detector +| Batch Size | Number of Epoches | Training Accuracy | Validation Accuracy | +| ------------- | ------------- | ------------- | ------------- | +| 128 | 5 | 97.44% | 97.24% | +| 64 | 5 | 96.49% | 96.77% | +| 256 | 5 | 98.19% | 97.45% | + +# Recognizer +| Batch Size | Number of Epoches | Training Accuracy | Validation Accuracy | +| ------------- | ------------- | ------------- | ------------- | +| 128 | 5 | 96.85% | 95.32% | +| 64 | 5 | 96.34% | 95.06% | +| 265 | 5 | 96.85% | 95.67% |