penny4860 · xtzhang1122 · Nov 30, 2022 · Dec 5, 2022 · Dec 5, 2022 · Dec 5, 2022
diff --git a/1_sample_loader.py b/1_sample_loader.py
@@ -1,5 +1,4 @@
 #-*- coding: utf-8 -*-
-
 import os
 import cv2
 import numpy as np
@@ -12,8 +11,8 @@
 import digit_detector.region_proposal as rp
 
 N_IMAGES = None
-DIR = '../datasets/svhn/train'
-ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json"
+DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train'
+ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/train/digitStruct.json"
 NEG_OVERLAP_THD = 0.05
 POS_OVERLAP_THD = 0.6
 PATCH_SIZE = (32,32)
@@ -24,16 +23,16 @@
     files = file_io.list_files(directory=DIR, pattern="*.png", recursive_option=False, n_files_to_sample=N_IMAGES, random_order=False)
     n_files = len(files)
     n_train_files = int(n_files * 0.8)
-    print n_train_files
+    print(n_train_files)
 
     extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator())
     train_samples, train_labels = extractor.extract_patch(files[:n_train_files], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD)
 
     extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator())
     validation_samples, validation_labels = extractor.extract_patch(files[n_train_files:], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD)
 
-    print train_samples.shape, train_labels.shape
-    print validation_samples.shape, validation_labels.shape
+    print(train_samples.shape, train_labels.shape)
+    print(validation_samples.shape, validation_labels.shape)
 
 #     show.plot_images(samples, labels.reshape(-1,).tolist())
 

diff --git a/2_train.py b/2_train.py
@@ -6,32 +6,36 @@
 import digit_detector.preprocess as preproc
 import digit_detector.train as train_
 
-DIR = '../datasets/svhn'
+# DIR = '/datasets/svhn'
 NB_FILTERS = 32
 NB_EPOCH = 5
+BATCHSIZE = 128
 
 DETECTOR_FILE = 'detector_model.hdf5'
 RECOGNIZER_FILE = 'recognize_model.hdf5'
 
 if __name__ == "__main__":
 
-    images_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "images")
-    labels_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "labels")
+    images_train = file_io.FileHDF5().read("train.hdf5", "images")
+    labels_train = file_io.FileHDF5().read("train.hdf5", "labels")
 
-    images_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "images")
-    labels_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "labels")
+    images_val = file_io.FileHDF5().read("val.hdf5", "images")
+    labels_val = file_io.FileHDF5().read("val.hdf5", "labels")
 
     # Train detector
     X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 2)
-    print "mean value of the train images : {}".format(mean_value)    # 107.524
-    print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)    # (457723, 32, 32, 1), (113430, 32, 32, 1)
-    train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=2, save_file=DETECTOR_FILE)
-
+    print("mean value of the train images : {}".format(mean_value))    # 108.784
+    print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape))    # (1279733, 32, 32, 1), (317081, 32, 32, 1)
+    train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=2, save_file=DETECTOR_FILE)
+    # loss: 0.0784 - accuracy: 0.9744 - val_loss: 0.0997 - val_accuracy: 0.9724
+    # Test score: 0.09970200061798096
+    # Test accuracy: 0.9724171161651611
+
     # Train recognizer
     X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 10)
-    print "mean value of the train images : {}".format(mean_value)    # 112.833
-    print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)    # (116913, 32, 32, 1), (29456, 32, 32, 1)
-    train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=10, save_file=RECOGNIZER_FILE)
-    # acc: 0.9541 - val_loss: 0.2125 - val_acc: 0.9452
-
-
+    print("mean value of the train images : {}".format(mean_value))    # 115.503
+    print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape))    # (267234, 32, 32, 1), (67359, 32, 32, 1)
+    train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=10, save_file=RECOGNIZER_FILE)
+    # loss: loss: 0.1070 - accuracy: 0.9685 - val_loss: 0.2196 - val_accuracy: 0.9532
+    # Test score: 0.21958307921886444
+    # Test accuracy: 0.9531614184379578
diff --git a/3_detect.py b/3_detect.py
@@ -13,8 +13,8 @@
 detect_model = "detector_model.hdf5"
 recognize_model = "recognize_model.hdf5"
 
-mean_value_for_detector = 107.524
-mean_value_for_recognizer = 112.833
+mean_value_for_detector = 108.784
+mean_value_for_recognizer = 115.503
 
 model_input_shape = (32,32,1)
 DIR = '../datasets/svhn/train'

diff --git a/4_evaluate.py b/4_evaluate.py
@@ -11,19 +11,18 @@
 import digit_detector.file_io as file_io
 import digit_detector.preprocess as preproc
 import digit_detector.annotation as ann
-import digit_detector.evaluate as eval
+import digit_detector.evaluate as eva
 import digit_detector.classify as cls
 
-
 model_filename = "detector_model.hdf5"
 model_input_shape = (32,32,1)
-DIR = '../datasets/svhn/train'
-ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json"
+DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train'
+ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/test/digitStruct.json"
 
 detect_model = "detector_model.hdf5"
 recognize_model = "recognize_model.hdf5"
-mean_value_for_detector = 107.524
-mean_value_for_recognizer = 112.833
+mean_value_for_detector = 108.784
+mean_value_for_recognizer = 115.503
 
 
 if __name__ == "__main__":
@@ -43,22 +42,16 @@
     det = detect.DigitSpotter(detector, recognizer, proposer)
 
     # 3. Evaluate average precision     
-    evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator())
+    evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator())
     recall, precision, f1_score = evaluator.run(img_files)
-    # recall value : 0.513115508514, precision value : 0.714285714286, f1_score : 0.597214783074
+    # recall value : 0.025614754098360656, precision value : 0.0328515111695138, f1_score : 0.02878526194588371
 
     # 4. Evaluate MSER
     detector = cls.TrueBinaryClassifier(input_shape=model_input_shape)
     preprocessor = preproc.NonePreprocessor()
 
     # Todo : detector, recognizer 를 none type 으로
     det = detect.DigitSpotter(detector, recognizer, proposer)
-    evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator())
+    evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator())
     recall, precision, f1_score = evaluator.run(img_files, do_nms=False)
-    #recall value : 0.630004601933, precision value : 0.0452547023239, f1_score : 0.0844436220084
-
-
-
-
-
-
+    # recall value : 0.08504098360655737, precision value : 0.001661811374398094, f1_score : 0.0032599196802922145
diff --git a/conf/svhn.json b/conf/svhn.json
@@ -1,22 +1,22 @@
 {
 	"dataset":
 	{
-		"pos_data_dir": "../datasets/svhn/train",
+		"pos_data_dir": "/datasets/svhn/train",
 		"pos_format": "*.png",
-		"test_data_dir": "../datasets/svhn/test",
-		"extra_data_dir": "../datasets/svhn/extra",
-		"easy_data_dir": "../datasets/svhn/easy_test",
-		"annotation_file": "../datasets/svhn/train/digitStruct.json",
-		"annotation_file_test": "../datasets/svhn/test/digitStruct.json",
-		"neg_data_dir": "../datasets/houses",
+		"test_data_dir": "/datasets/svhn/test",
+		"extra_data_dir": "/datasets/svhn/extra",
+		"easy_data_dir": "/datasets/svhn/easy_test",
+		"annotation_file": "/datasets/svhn/train/digitStruct.json",
+		"annotation_file_test": "/datasets/svhn/test/digitStruct.json",
+		"neg_data_dir": "/datasets/houses",
 		"neg_format": "*.jpg",
-		"neg_data_from_train": "../datasets/svhn/train/negative_images",
+		"neg_data_from_train": "/datasets/svhn/train/negative_images",
 		"neg_data_from_train_format": "*.png"
 	},
 
 	"extractor":
 	{
-		"output_file": "../datasets/output/svhn_features.hdf5",
+		"output_file": "/datasets/output/svhn_features.hdf5",
 		"padding": 0,
 		"sampling_ratio_for_positive_images": 1.0,
 		"sampling_ratio_for_negative_images": 1.0,
@@ -37,10 +37,10 @@
 		"algorithm": "ConvNet",
 		"parameters":
 		{
-			"model_file": "models/detector_model.hdf5",
+			"model_file": "/models/detector_model.hdf5",
 			"mean_value": 84.9649
 		},
-		"output_file": "models/detector_model.hdf5"
+		"output_file": "/models/detector_model.hdf5"
 	},
 
 	"detector":
@@ -50,7 +50,7 @@
 		"pyramid_scale": 0.9,
 		"window_dim": [32, 16],
 		"min_probability": 0.7,
-		"output_file": "../datasets/output/svhn_detector.pkl"
+		"output_file": "/datasets/output/svhn_detector.pkl"
 	},
 
 	"hard_negative_mine":

diff --git a/detector_model.hdf5 b/detector_model.hdf5
diff --git a/digit_detector/__pycache__/detect.cpython-39.pyc b/digit_detector/__pycache__/detect.cpython-39.pyc
diff --git a/digit_detector/__pycache__/evaluate.cpython-39.pyc b/digit_detector/__pycache__/evaluate.cpython-39.pyc
diff --git a/digit_detector/annotation.py b/digit_detector/annotation.py
@@ -1,10 +1,8 @@
 #-*- coding: utf-8 -*-
-
-import file_io
+from digit_detector import file_io
 import os
 import numpy as np
 
-
 class Annotation:
 
     def __init__(self, annotation_file):

diff --git a/digit_detector/classify.py b/digit_detector/classify.py
@@ -1,5 +1,4 @@
 #-*- coding: utf-8 -*-
-
 from abc import ABCMeta, abstractmethod
 import keras
 import numpy as np
@@ -29,8 +28,9 @@ def predict_proba(self, patches):
         probs (N, n_classes)
         """
         patches_preprocessed = self._preprocessor.run(patches)
-        probs = self._model.predict_proba(patches_preprocessed, verbose=0)
-        return probs
+        predict_x = self._model.predict(patches_preprocessed) 
+
+        return predict_x
 
 class TrueBinaryClassifier(Classifier):
     """Classifier always predict true """

diff --git a/digit_detector/evaluate.py b/digit_detector/evaluate.py
@@ -1,10 +1,8 @@
 #-*- coding: utf-8 -*-
-
 import progressbar
 import cv2
 import numpy as np
-import region_proposal as rp
-
+#import region_proposal as rp
 
 class Evaluator(object):
 
@@ -76,4 +74,4 @@ def _calc_true_positive(self, overlaps_per_truth):
 
 
     def _print_msg(self, recall, precision, f1_score):
-        print "recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score)
+        print("recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score))
diff --git a/digit_detector/file_io.py b/digit_detector/file_io.py
@@ -178,6 +178,7 @@ def list_files(directory, pattern="*.*", n_files_to_sample=None, recursive_optio
             files = random.sample(files, n_files_to_sample)
         else:
             files = files[:n_files_to_sample]
+
     return files
 
 

diff --git a/digit_detector/region_proposal.py b/digit_detector/region_proposal.py
@@ -2,10 +2,7 @@
 import numpy as np
 import cv2
 from matplotlib import pyplot as plt
-
-import crop
-import show
-
+from digit_detector import crop, show
 
 class Regions:
 
@@ -60,8 +57,8 @@ class MserRegionProposer(_RegionProposer):
 
     def detect(self, img):
         gray = self._to_gray(img)
-        mser = cv2.MSER(_delta = 1)
-        regions = mser.detect(gray, None)
+        mser = cv2.MSER_create(delta=1)
+        regions, _ = mser.detectRegions(gray)
         bounding_boxes = self._get_boxes(regions)
         regions = Regions(img, bounding_boxes)
         return regions

diff --git a/digit_detector/train.py b/digit_detector/train.py
@@ -1,13 +1,11 @@
-
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Convolution2D, MaxPooling2D
-from keras import backend as K
-from keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D
+from tensorflow.keras import layers, activations
+from tensorflow.keras import backend as K
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
 
 import numpy as np
 
-
 def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size=128, nb_epoch=5, nb_classes=2, do_augment=False, save_file='models/detector_model.hdf5'):
     """ vgg-like deep convolutional network """
 
@@ -18,37 +16,30 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size
 
     # size of pooling area for max pooling
     pool_size = (2, 2)
-    # convolution kernel size
-    kernel_size = (3, 3) 
     input_shape = (img_rows, img_cols, 1)
 
-
     model = Sequential()
-    model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
-                            border_mode='valid',
-                            input_shape=input_shape))
-    model.add(Activation('relu'))
-    model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
-    model.add(Activation('relu'))
+    model.add(Convolution2D(filters=nb_filters, kernel_size=3, padding='valid',
+                            input_shape=input_shape, activation='relu'))
+    model.add(Convolution2D(filters=nb_filters, kernel_size=3, 
+                            input_shape=input_shape[1:], activation='relu'))
     model.add(MaxPooling2D(pool_size=pool_size))
-    # (16, 8, 32)
 
-    model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1]))
-    model.add(Activation('relu'))
-    model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1]))
-    model.add(Activation('relu'))
+    model.add(Convolution2D(filters=nb_filters*2, kernel_size=3,
+                            activation='relu'))
+    model.add(Convolution2D(filters=nb_filters*2, kernel_size=3, 
+                            activation='relu'))
     model.add(MaxPooling2D(pool_size=pool_size))
-    # (8, 4, 64) = (2048)
-
+
     model.add(Flatten())
     model.add(Dense(1024))
-    model.add(Activation('relu'))
+    model.add(Activation(activations.relu))
     model.add(Dropout(0.5))
     model.add(Dense(nb_classes))
-    model.add(Activation('softmax'))
+    model.add(Activation(activations.softmax))
 
     model.compile(loss='categorical_crossentropy',
-                  optimizer='adadelta',
+                  optimizer='adam',
                   metrics=['accuracy'])
 
     if do_augment:
@@ -60,14 +51,12 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size
             zoom_range=0.2)
         datagen.fit(X_train)
         model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
-                            samples_per_epoch=len(X_train), nb_epoch=nb_epoch,
+                            samples_per_epoch=len(X_train), epochs=nb_epoch,
                             validation_data=(X_test, Y_test))
     else:
-        model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
+        model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,
               verbose=1, validation_data=(X_test, Y_test))
     score = model.evaluate(X_test, Y_test, verbose=0)
     print('Test score:', score[0])
     print('Test accuracy:', score[1])
-    model.save(save_file)  
-
-
+    model.save(save_file)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,11 @@
+funcsigs==1.0.2
+h5py>=2.9.0
+keras==2.10.0
+mock==2.0.0
+pbr==3.0.0
+protobuf>=3.9.2
+pyyaml==3.12
+tensorflow-gpu==2.10.1
+theano==0.9.0
+tensorflow==2.10.1
+tensorflow-intel==2.10.0