-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from 1eedaegon/master
update
- Loading branch information
Showing
18 changed files
with
273 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Wed May 2 19:21:50 2018 | ||
@author: stu | ||
""" | ||
|
||
#전처리 code : extract_features.py | ||
|
||
import numpy as np | ||
import os | ||
import glob | ||
import tensorflow as tf | ||
import pandas as pd | ||
|
||
tf.set_random_seed(777) | ||
|
||
train_info = pd.read_csv("C:/data/sound/train.csv",delimiter=',') | ||
train_data = np.genfromtxt("C:/data/sound/feature_train.csv", delimiter=',') | ||
|
||
#label set : 라벨 값을 0~41로 바꿈 | ||
|
||
def labels2Num(labels): | ||
df_label = pd.DataFrame(labels) | ||
l = train_info['label'].unique() | ||
for i in range(len(l)): | ||
df_label[df_label==l[i]] = i | ||
return df_label | ||
|
||
labels = train_info['label'] | ||
df_label = labels2Num(labels) | ||
|
||
#train data set | ||
train_data = pd.DataFrame(train_data) | ||
train_data['label']=df_label | ||
train_data = train_data.astype(np.float32) | ||
|
||
#훈련세트, validation세트 나누기 | ||
from sklearn.model_selection import train_test_split | ||
train_set, validate_set = train_test_split(train_data, test_size = 0.3) | ||
trainData = train_set.values[:,0:193] | ||
trainLabel = train_set.values[:,-1] | ||
validateData = validate_set.values[:,0:193] | ||
validataLabel = validate_set.values[:,-1] | ||
|
||
|
||
# 텐서플로우 모델 생성 | ||
tf.reset_default_graph() | ||
|
||
n_dim = 193 | ||
n_classes = 41 | ||
training_epochs = 700 | ||
learning_rate = 0.001 | ||
batch_size = 100 | ||
steps_for_validate = 5 | ||
keep_prob = tf.placeholder(tf.float32) | ||
|
||
X = tf.placeholder(tf.float32, [None, n_dim]) | ||
X_1d = tf.reshape(X, [-1,193,1]) | ||
Y = tf.placeholder(tf.int32, [None, 1]) | ||
Y_onehot=tf.reshape(tf.one_hot(Y, 41), [-1, 41]) | ||
p_keep_conv = tf.placeholder(tf.float32, name='p_keep_conv') | ||
p_keep_hidden = tf.placeholder(tf.float32, name='p_keep_hidden') | ||
193*1.5 | ||
#convolution layer 1 | ||
c1 = tf.layers.conv1d(X_1d, 386,kernel_size=2, strides=1, padding='Same', | ||
activation=tf.nn.relu, name='c1') | ||
n1 = tf.layers.batch_normalization(c1) | ||
p1 = tf.layers.max_pooling1d(n1, pool_size=2, strides=2, padding='Same') | ||
p1 = tf.nn.dropout(p1, p_keep_conv) | ||
386*1.5 | ||
#shape=(?, 97, 386) | ||
c2 = tf.layers.conv1d(p1, 579,kernel_size=2, strides=1, padding='Same', | ||
activation=tf.nn.relu, name='c2') | ||
n2 = tf.layers.batch_normalization(c2) | ||
p2 = tf.layers.max_pooling1d(n2, pool_size=2, strides=2, padding='Same') | ||
p2 = tf.nn.dropout(p2, p_keep_conv) | ||
|
||
#shape=(?, 49, 579) | ||
c3 = tf.layers.conv1d(p2, 579,kernel_size=1, strides=1, padding='Same', | ||
activation=tf.nn.relu, name='c3') | ||
n3 = tf.layers.batch_normalization(c3) | ||
p3 = tf.layers.max_pooling1d(n3, pool_size=2, strides=2, padding='Same') | ||
p3 = tf.nn.dropout(p3, p_keep_conv) | ||
|
||
#shape=(?, 25, 579) | ||
L4_flat = tf.reshape(p3, [-1,25*579]) | ||
|
||
W4 = tf.get_variable("W4", shape=[25*579, 624], initializer=tf.contrib.layers.xavier_initializer()) | ||
L5 = tf.nn.relu(tf.matmul(L4_flat, W4)) | ||
n5 = tf.layers.batch_normalization(L5) | ||
L5 = tf.nn.dropout(n5, p_keep_hidden) | ||
|
||
W5 = tf.get_variable("W5", shape=[624,41], initializer=tf.contrib.layers.xavier_initializer()) | ||
b = tf.Variable(tf.random_normal([41])) | ||
logits = tf.matmul(L5, W5) + b | ||
|
||
# define cost/loss & optimizer | ||
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels= Y_onehot)) | ||
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 아담버젼 | ||
predict_op = tf.argmax(logits, 1, name="pred") | ||
|
||
|
||
# initialize | ||
sess = tf.Session() | ||
sess.run(tf.global_variables_initializer()) | ||
saver = tf.train.Saver() | ||
|
||
|
||
# train my model | ||
|
||
print('Learning started. It takes sometime.') | ||
for epoch in range(training_epochs): | ||
avg_cost = 0 | ||
total_batch = int(len(trainData) / batch_size) | ||
for i in range(total_batch): | ||
batch_xs = trainData[i*batch_size:(i+1)*batch_size] | ||
batch_ys = trainLabel[i*batch_size:(i+1)*batch_size].reshape(-1, 1) | ||
feed_dict = {X: batch_xs, Y: batch_ys, p_keep_conv: .7, p_keep_hidden: .5} | ||
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict) | ||
avg_cost += c / total_batch | ||
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost)) | ||
if epoch % steps_for_validate == steps_for_validate-1: | ||
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y_onehot, 1)) | ||
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | ||
print('Accuracy:', sess.run(accuracy, feed_dict={ | ||
X: validateData, Y: validataLabel.reshape(-1, 1), p_keep_conv: 1, p_keep_hidden: 1})) | ||
print('Finished!') | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
1.'Hi-hat' : 풋 심벌즈(드럼에 달린, 발로 치는 심벌즈) | ||
2.'Saxophone' : 섹소폰 | ||
3.'Trumpet' : 트럼펫, 나팔 | ||
4.'Glockenspiel' : 종금이라고도 불리는 오르프악기 | ||
5.'Cello' : 첼로 | ||
6.'Knock' : (문 등을 똑똑 하고) 두드리다, 노크하다 | ||
7.'Gunshot_or_gunfire' : 발포, 총격; 총소리 | ||
8.'Clarinet' : 클라리넷 | ||
9.'Computer_keyboard' : 컴퓨터 키보드 | ||
10.'Keys_jangling' : 열쇠 쨍그랑 소리 | ||
11.'Snare_drum' : 스네어드럼(뒷면에 쇠 울림줄을 댄 작은 북) | ||
12.'Writing' : (글자・숫자를) 쓰다 | ||
13.'Laughter' : (소리내어) 웃다 | ||
14.'Tearing' : 찢다, 뜯다; 찢어지다 | ||
15.'Fart' : (특히 소리가 크게 나게) 방귀를 뀌다 | ||
16.'Oboe' : 오보에 | ||
17.'Flute' : 플루트 | ||
18.'Cough' : 기침하다 | ||
19.'Telephone' : 전화; 전화기 | ||
20.'Bark' : 1. (개 등이) 짖는 소리 2. (총・사람 목소리 등이 짧게 내는) 큰 소리 | ||
21.'Chime' : (차임벨) 소리 | ||
22.'Bass_drum' : 베이스 드럼 | ||
23.'Bus' : 버스 | ||
24.'Squeak' : (그렇게 크지 않게) 끽 하는 소리를 내다 | ||
25.'Scissors' : 가위 | ||
26.'Harmonica' : 하모니카 | ||
27.'Gong' : (악기・신호용) 공 , 권투등에 시작을 알리는 것 | ||
28.'Microwave_oven' : 전자 레인지 | ||
29.'Burping_or_eructation' : 트림 | ||
30.'Double_bass' : 더블 베이스 | ||
31.'Shatter' : 산산이 부서지다, 산산조각 나다 | ||
32.'Fireworks' : 불꽃놀이 | ||
33.'Tambourine' : 트랜버린 , 탬버린 | ||
34.'Cowbell' : (소를 쉽게 찾기 위해 목에 다는) 소 방울, 카우 벨(독일 타악기) | ||
35.'Electric_piano' : 전자피아노 | ||
36.'Meow' : 야옹(고양이 울음소리) | ||
37.'Drawer_open_or_close' : 서랍 여닫는 소리 | ||
38.'Applause' : 박수 (갈채) | ||
39.'Acoustic_guitar' : 어쿠스틱 기타, 클래식 기타 | ||
40.'Violin_or_fiddle' : 바이올린 과 피들 | ||
41.'Finger_snapping' : 손가락 튕기는 소리 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
model_checkpoint_path: "/home/paperspace/Downloads/optx/optx" | ||
all_model_checkpoint_paths: "/home/paperspace/Downloads/optx/optx" |
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,23 @@ | ||
def five_sec_extract(file): | ||
import librosa | ||
import numpy as np | ||
array = np.repeat(0., 20 * 430).reshape(20, 430) | ||
#zero padding to file.shape[0] X 20 X 430 | ||
array = np.zeros((20, 430)) | ||
y, sr = librosa.core.load(file, | ||
mono=True, res_type="kaiser_fast") | ||
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20) | ||
length=mfcc.shape[1] | ||
abs_mfcc=np.abs(mfcc) | ||
if length == 430: | ||
array=mfcc | ||
elif length < 430: | ||
array[:, :length]=mfcc | ||
tile_num = (430//length)+1 | ||
array=np.tile(mfcc,tile_num)[:,0:430] | ||
elif length > 430: | ||
sample = np.repeat(0., (length - 430)*20).reshape(20,length - 430) | ||
for j in range(length - 430): | ||
for i in range(20): | ||
sample[i,j]=np.var(mfcc[i,j:j+430]) | ||
A=np.argmax(sample, axis=1) | ||
start=np.argmax(np.bincount(A)) | ||
argmax=np.argmax(abs_mfcc, axis=1) | ||
sample=[] | ||
for i in range(np.max(argmax)): | ||
sample.append(np.sum((argmax>=i) & (argmax <i+430))) | ||
start=sample.index(max(sample)) | ||
array=mfcc[:, start:start+430] | ||
return(array.reshape(20*430)) | ||
return(array.reshape(20*430)) |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.