Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revision #48

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
11 changes: 5 additions & 6 deletions 1_sample_loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#-*- coding: utf-8 -*-

import os
import cv2
import numpy as np
Expand All @@ -12,8 +11,8 @@
import digit_detector.region_proposal as rp

N_IMAGES = None
DIR = '../datasets/svhn/train'
ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json"
DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train'
ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/train/digitStruct.json"
NEG_OVERLAP_THD = 0.05
POS_OVERLAP_THD = 0.6
PATCH_SIZE = (32,32)
Expand All @@ -24,16 +23,16 @@
files = file_io.list_files(directory=DIR, pattern="*.png", recursive_option=False, n_files_to_sample=N_IMAGES, random_order=False)
n_files = len(files)
n_train_files = int(n_files * 0.8)
print n_train_files
print(n_train_files)

extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator())
train_samples, train_labels = extractor.extract_patch(files[:n_train_files], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD)

extractor = extractor_.Extractor(rp.MserRegionProposer(), ann.SvhnAnnotation(ANNOTATION_FILE), rp.OverlapCalculator())
validation_samples, validation_labels = extractor.extract_patch(files[n_train_files:], PATCH_SIZE, POS_OVERLAP_THD, NEG_OVERLAP_THD)

print train_samples.shape, train_labels.shape
print validation_samples.shape, validation_labels.shape
print(train_samples.shape, train_labels.shape)
print(validation_samples.shape, validation_labels.shape)

# show.plot_images(samples, labels.reshape(-1,).tolist())

Expand Down
34 changes: 19 additions & 15 deletions 2_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,36 @@
import digit_detector.preprocess as preproc
import digit_detector.train as train_

DIR = '../datasets/svhn'
# DIR = '/datasets/svhn'
NB_FILTERS = 32
NB_EPOCH = 5
BATCHSIZE = 128

DETECTOR_FILE = 'detector_model.hdf5'
RECOGNIZER_FILE = 'recognize_model.hdf5'

if __name__ == "__main__":

images_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "images")
labels_train = file_io.FileHDF5().read(os.path.join(DIR, "train.hdf5"), "labels")
images_train = file_io.FileHDF5().read("train.hdf5", "images")
labels_train = file_io.FileHDF5().read("train.hdf5", "labels")

images_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "images")
labels_val = file_io.FileHDF5().read(os.path.join(DIR, "val.hdf5"), "labels")
images_val = file_io.FileHDF5().read("val.hdf5", "images")
labels_val = file_io.FileHDF5().read("val.hdf5", "labels")

# Train detector
X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 2)
print "mean value of the train images : {}".format(mean_value) # 107.524
print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape) # (457723, 32, 32, 1), (113430, 32, 32, 1)
train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=2, save_file=DETECTOR_FILE)

print("mean value of the train images : {}".format(mean_value)) # 108.784
print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)) # (1279733, 32, 32, 1), (317081, 32, 32, 1)
train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=2, save_file=DETECTOR_FILE)
# loss: 0.0784 - accuracy: 0.9744 - val_loss: 0.0997 - val_accuracy: 0.9724
# Test score: 0.09970200061798096
# Test accuracy: 0.9724171161651611

# Train recognizer
X_train, X_val, Y_train, Y_val, mean_value = preproc.GrayImgTrainPreprocessor().run(images_train, labels_train, images_val, labels_val, 10)
print "mean value of the train images : {}".format(mean_value) # 112.833
print "Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape) # (116913, 32, 32, 1), (29456, 32, 32, 1)
train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, nb_classes=10, save_file=RECOGNIZER_FILE)
# acc: 0.9541 - val_loss: 0.2125 - val_acc: 0.9452


print("mean value of the train images : {}".format(mean_value)) # 115.503
print("Train image shape is {}, and Validation image shape is {}".format(X_train.shape, X_val.shape)) # (267234, 32, 32, 1), (67359, 32, 32, 1)
train_.train_detector(X_train, X_val, Y_train, Y_val, nb_filters = NB_FILTERS, nb_epoch=NB_EPOCH, batch_size=BATCHSIZE, nb_classes=10, save_file=RECOGNIZER_FILE)
# loss: loss: 0.1070 - accuracy: 0.9685 - val_loss: 0.2196 - val_accuracy: 0.9532
# Test score: 0.21958307921886444
# Test accuracy: 0.9531614184379578
4 changes: 2 additions & 2 deletions 3_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
detect_model = "detector_model.hdf5"
recognize_model = "recognize_model.hdf5"

mean_value_for_detector = 107.524
mean_value_for_recognizer = 112.833
mean_value_for_detector = 108.784
mean_value_for_recognizer = 115.503

model_input_shape = (32,32,1)
DIR = '../datasets/svhn/train'
Expand Down
25 changes: 9 additions & 16 deletions 4_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,18 @@
import digit_detector.file_io as file_io
import digit_detector.preprocess as preproc
import digit_detector.annotation as ann
import digit_detector.evaluate as eval
import digit_detector.evaluate as eva
import digit_detector.classify as cls


model_filename = "detector_model.hdf5"
model_input_shape = (32,32,1)
DIR = '../datasets/svhn/train'
ANNOTATION_FILE = "../datasets/svhn/train/digitStruct.json"
DIR = 'C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/datasets/svhn/train'
ANNOTATION_FILE = "C:/Users/zhan14417/OneDrive/GitHub/SVHN-deep-digit-detector/annotation/test/digitStruct.json"

detect_model = "detector_model.hdf5"
recognize_model = "recognize_model.hdf5"
mean_value_for_detector = 107.524
mean_value_for_recognizer = 112.833
mean_value_for_detector = 108.784
mean_value_for_recognizer = 115.503


if __name__ == "__main__":
Expand All @@ -43,22 +42,16 @@
det = detect.DigitSpotter(detector, recognizer, proposer)

# 3. Evaluate average precision
evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator())
evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator())
recall, precision, f1_score = evaluator.run(img_files)
# recall value : 0.513115508514, precision value : 0.714285714286, f1_score : 0.597214783074
# recall value : 0.025614754098360656, precision value : 0.0328515111695138, f1_score : 0.02878526194588371

# 4. Evaluate MSER
detector = cls.TrueBinaryClassifier(input_shape=model_input_shape)
preprocessor = preproc.NonePreprocessor()

# Todo : detector, recognizer 를 none type 으로
det = detect.DigitSpotter(detector, recognizer, proposer)
evaluator = eval.Evaluator(det, annotator, rp.OverlapCalculator())
evaluator = eva.Evaluator(det, annotator, rp.OverlapCalculator())
recall, precision, f1_score = evaluator.run(img_files, do_nms=False)
#recall value : 0.630004601933, precision value : 0.0452547023239, f1_score : 0.0844436220084






# recall value : 0.08504098360655737, precision value : 0.001661811374398094, f1_score : 0.0032599196802922145
24 changes: 12 additions & 12 deletions conf/svhn.json
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
{
"dataset":
{
"pos_data_dir": "../datasets/svhn/train",
"pos_data_dir": "/datasets/svhn/train",
"pos_format": "*.png",
"test_data_dir": "../datasets/svhn/test",
"extra_data_dir": "../datasets/svhn/extra",
"easy_data_dir": "../datasets/svhn/easy_test",
"annotation_file": "../datasets/svhn/train/digitStruct.json",
"annotation_file_test": "../datasets/svhn/test/digitStruct.json",
"neg_data_dir": "../datasets/houses",
"test_data_dir": "/datasets/svhn/test",
"extra_data_dir": "/datasets/svhn/extra",
"easy_data_dir": "/datasets/svhn/easy_test",
"annotation_file": "/datasets/svhn/train/digitStruct.json",
"annotation_file_test": "/datasets/svhn/test/digitStruct.json",
"neg_data_dir": "/datasets/houses",
"neg_format": "*.jpg",
"neg_data_from_train": "../datasets/svhn/train/negative_images",
"neg_data_from_train": "/datasets/svhn/train/negative_images",
"neg_data_from_train_format": "*.png"
},

"extractor":
{
"output_file": "../datasets/output/svhn_features.hdf5",
"output_file": "/datasets/output/svhn_features.hdf5",
"padding": 0,
"sampling_ratio_for_positive_images": 1.0,
"sampling_ratio_for_negative_images": 1.0,
Expand All @@ -37,10 +37,10 @@
"algorithm": "ConvNet",
"parameters":
{
"model_file": "models/detector_model.hdf5",
"model_file": "/models/detector_model.hdf5",
"mean_value": 84.9649
},
"output_file": "models/detector_model.hdf5"
"output_file": "/models/detector_model.hdf5"
},

"detector":
Expand All @@ -50,7 +50,7 @@
"pyramid_scale": 0.9,
"window_dim": [32, 16],
"min_probability": 0.7,
"output_file": "../datasets/output/svhn_detector.pkl"
"output_file": "/datasets/output/svhn_detector.pkl"
},

"hard_negative_mine":
Expand Down
Binary file modified detector_model.hdf5
Binary file not shown.
Binary file added digit_detector/__pycache__/detect.cpython-39.pyc
Binary file not shown.
Binary file not shown.
4 changes: 1 addition & 3 deletions digit_detector/annotation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#-*- coding: utf-8 -*-

import file_io
from digit_detector import file_io
import os
import numpy as np


class Annotation:

def __init__(self, annotation_file):
Expand Down
6 changes: 3 additions & 3 deletions digit_detector/classify.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#-*- coding: utf-8 -*-

from abc import ABCMeta, abstractmethod
import keras
import numpy as np
Expand Down Expand Up @@ -29,8 +28,9 @@ def predict_proba(self, patches):
probs (N, n_classes)
"""
patches_preprocessed = self._preprocessor.run(patches)
probs = self._model.predict_proba(patches_preprocessed, verbose=0)
return probs
predict_x = self._model.predict(patches_preprocessed)

return predict_x

class TrueBinaryClassifier(Classifier):
"""Classifier always predict true """
Expand Down
6 changes: 2 additions & 4 deletions digit_detector/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#-*- coding: utf-8 -*-

import progressbar
import cv2
import numpy as np
import region_proposal as rp

#import region_proposal as rp

class Evaluator(object):

Expand Down Expand Up @@ -76,4 +74,4 @@ def _calc_true_positive(self, overlaps_per_truth):


def _print_msg(self, recall, precision, f1_score):
print "recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score)
print("recall value : {}, precision value : {}, f1_score : {}".format(recall, precision, f1_score))
1 change: 1 addition & 0 deletions digit_detector/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def list_files(directory, pattern="*.*", n_files_to_sample=None, recursive_optio
files = random.sample(files, n_files_to_sample)
else:
files = files[:n_files_to_sample]

return files


Expand Down
9 changes: 3 additions & 6 deletions digit_detector/region_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
import numpy as np
import cv2
from matplotlib import pyplot as plt

import crop
import show

from digit_detector import crop, show

class Regions:

Expand Down Expand Up @@ -60,8 +57,8 @@ class MserRegionProposer(_RegionProposer):

def detect(self, img):
gray = self._to_gray(img)
mser = cv2.MSER(_delta = 1)
regions = mser.detect(gray, None)
mser = cv2.MSER_create(delta=1)
regions, _ = mser.detectRegions(gray)
bounding_boxes = self._get_boxes(regions)
regions = Regions(img, bounding_boxes)
return regions
Expand Down
51 changes: 20 additions & 31 deletions digit_detector/train.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D
from tensorflow.keras import layers, activations
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np


def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size=128, nb_epoch=5, nb_classes=2, do_augment=False, save_file='models/detector_model.hdf5'):
""" vgg-like deep convolutional network """

Expand All @@ -18,37 +16,30 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size

# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)
input_shape = (img_rows, img_cols, 1)


model = Sequential()
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
border_mode='valid',
input_shape=input_shape))
model.add(Activation('relu'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))
model.add(Convolution2D(filters=nb_filters, kernel_size=3, padding='valid',
input_shape=input_shape, activation='relu'))
model.add(Convolution2D(filters=nb_filters, kernel_size=3,
input_shape=input_shape[1:], activation='relu'))
model.add(MaxPooling2D(pool_size=pool_size))
# (16, 8, 32)

model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))
model.add(Convolution2D(nb_filters*2, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))
model.add(Convolution2D(filters=nb_filters*2, kernel_size=3,
activation='relu'))
model.add(Convolution2D(filters=nb_filters*2, kernel_size=3,
activation='relu'))
model.add(MaxPooling2D(pool_size=pool_size))
# (8, 4, 64) = (2048)


model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Activation(activations.relu))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
model.add(Activation(activations.softmax))

model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
optimizer='adam',
metrics=['accuracy'])

if do_augment:
Expand All @@ -60,14 +51,12 @@ def train_detector(X_train, X_test, Y_train, Y_test, nb_filters = 32, batch_size
zoom_range=0.2)
datagen.fit(X_train)
model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
samples_per_epoch=len(X_train), nb_epoch=nb_epoch,
samples_per_epoch=len(X_train), epochs=nb_epoch,
validation_data=(X_test, Y_test))
else:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch,
verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
model.save(save_file)


model.save(save_file)
11 changes: 11 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
funcsigs==1.0.2
h5py>=2.9.0
keras==2.10.0
mock==2.0.0
pbr==3.0.0
protobuf>=3.9.2
pyyaml==3.12
tensorflow-gpu==2.10.1
theano==0.9.0
tensorflow==2.10.1
tensorflow-intel==2.10.0
Loading