Skip to content

Commit

Permalink
Merge pull request #27 from 1eedaegon/master
Browse files Browse the repository at this point in the history
update
  • Loading branch information
Trigger21 authored May 3, 2018
2 parents 1868c64 + cab46d9 commit 71466b7
Show file tree
Hide file tree
Showing 18 changed files with 273 additions and 53 deletions.
150 changes: 150 additions & 0 deletions PROJECT2/audio_conv1d_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# -*- coding: utf-8 -*-
"""
Created on Wed May 2 19:21:50 2018
@author: stu
"""

#전처리 code : extract_features.py

import numpy as np
import os
import glob
import tensorflow as tf
import pandas as pd

tf.set_random_seed(777)

train_info = pd.read_csv("C:/data/sound/train.csv",delimiter=',')
train_data = np.genfromtxt("C:/data/sound/feature_train.csv", delimiter=',')

#label set : 라벨 값을 0~41로 바꿈

def labels2Num(labels):
df_label = pd.DataFrame(labels)
l = train_info['label'].unique()
for i in range(len(l)):
df_label[df_label==l[i]] = i
return df_label

labels = train_info['label']
df_label = labels2Num(labels)

#train data set
train_data = pd.DataFrame(train_data)
train_data['label']=df_label
train_data = train_data.astype(np.float32)

#훈련세트, validation세트 나누기
from sklearn.model_selection import train_test_split
train_set, validate_set = train_test_split(train_data, test_size = 0.3)
trainData = train_set.values[:,0:193]
trainLabel = train_set.values[:,-1]
validateData = validate_set.values[:,0:193]
validataLabel = validate_set.values[:,-1]


# 텐서플로우 모델 생성
tf.reset_default_graph()

n_dim = 193
n_classes = 41
training_epochs = 700
learning_rate = 0.001
batch_size = 100
steps_for_validate = 5
keep_prob = tf.placeholder(tf.float32)

X = tf.placeholder(tf.float32, [None, n_dim])
X_1d = tf.reshape(X, [-1,193,1])
Y = tf.placeholder(tf.int32, [None, 1])
Y_onehot=tf.reshape(tf.one_hot(Y, 41), [-1, 41])
p_keep_conv = tf.placeholder(tf.float32, name='p_keep_conv')
p_keep_hidden = tf.placeholder(tf.float32, name='p_keep_hidden')
193*1.5
#convolution layer 1
c1 = tf.layers.conv1d(X_1d, 386,kernel_size=2, strides=1, padding='Same',
activation=tf.nn.relu, name='c1')
n1 = tf.layers.batch_normalization(c1)
p1 = tf.layers.max_pooling1d(n1, pool_size=2, strides=2, padding='Same')
p1 = tf.nn.dropout(p1, p_keep_conv)
386*1.5
#shape=(?, 97, 386)
c2 = tf.layers.conv1d(p1, 579,kernel_size=2, strides=1, padding='Same',
activation=tf.nn.relu, name='c2')
n2 = tf.layers.batch_normalization(c2)
p2 = tf.layers.max_pooling1d(n2, pool_size=2, strides=2, padding='Same')
p2 = tf.nn.dropout(p2, p_keep_conv)

#shape=(?, 49, 579)
c3 = tf.layers.conv1d(p2, 579,kernel_size=1, strides=1, padding='Same',
activation=tf.nn.relu, name='c3')
n3 = tf.layers.batch_normalization(c3)
p3 = tf.layers.max_pooling1d(n3, pool_size=2, strides=2, padding='Same')
p3 = tf.nn.dropout(p3, p_keep_conv)

#shape=(?, 25, 579)
L4_flat = tf.reshape(p3, [-1,25*579])

W4 = tf.get_variable("W4", shape=[25*579, 624], initializer=tf.contrib.layers.xavier_initializer())
L5 = tf.nn.relu(tf.matmul(L4_flat, W4))
n5 = tf.layers.batch_normalization(L5)
L5 = tf.nn.dropout(n5, p_keep_hidden)

W5 = tf.get_variable("W5", shape=[624,41], initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.random_normal([41]))
logits = tf.matmul(L5, W5) + b

# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels= Y_onehot))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 아담버젼
predict_op = tf.argmax(logits, 1, name="pred")


# initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()


# train my model

print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(trainData) / batch_size)
for i in range(total_batch):
batch_xs = trainData[i*batch_size:(i+1)*batch_size]
batch_ys = trainLabel[i*batch_size:(i+1)*batch_size].reshape(-1, 1)
feed_dict = {X: batch_xs, Y: batch_ys, p_keep_conv: .7, p_keep_hidden: .5}
c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
avg_cost += c / total_batch
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
if epoch % steps_for_validate == steps_for_validate-1:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y_onehot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={
X: validateData, Y: validataLabel.reshape(-1, 1), p_keep_conv: 1, p_keep_hidden: 1}))
print('Finished!')






















41 changes: 41 additions & 0 deletions PROJECT2/label_ksw.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
1.'Hi-hat' : 풋 심벌즈(드럼에 달린, 발로 치는 심벌즈)
2.'Saxophone' : 섹소폰
3.'Trumpet' : 트럼펫, 나팔
4.'Glockenspiel' : 종금이라고도 불리는 오르프악기
5.'Cello' : 첼로
6.'Knock' : (문 등을 똑똑 하고) 두드리다, 노크하다
7.'Gunshot_or_gunfire' : 발포, 총격; 총소리
8.'Clarinet' : 클라리넷
9.'Computer_keyboard' : 컴퓨터 키보드
10.'Keys_jangling' : 열쇠 쨍그랑 소리
11.'Snare_drum' : 스네어드럼(뒷면에 쇠 울림줄을 댄 작은 북)
12.'Writing' : (글자・숫자를) 쓰다
13.'Laughter' : (소리내어) 웃다
14.'Tearing' : 찢다, 뜯다; 찢어지다
15.'Fart' : (특히 소리가 크게 나게) 방귀를 뀌다
16.'Oboe' : 오보에
17.'Flute' : 플루트
18.'Cough' : 기침하다
19.'Telephone' : 전화; 전화기
20.'Bark' : 1. (개 등이) 짖는 소리 2. (총・사람 목소리 등이 짧게 내는) 큰 소리
21.'Chime' : (차임벨) 소리
22.'Bass_drum' : 베이스 드럼
23.'Bus' : 버스
24.'Squeak' : (그렇게 크지 않게) 끽 하는 소리를 내다
25.'Scissors' : 가위
26.'Harmonica' : 하모니카
27.'Gong' : (악기・신호용) 공 , 권투등에 시작을 알리는 것
28.'Microwave_oven' : 전자 레인지
29.'Burping_or_eructation' : 트림
30.'Double_bass' : 더블 베이스
31.'Shatter' : 산산이 부서지다, 산산조각 나다
32.'Fireworks' : 불꽃놀이
33.'Tambourine' : 트랜버린 , 탬버린
34.'Cowbell' : (소를 쉽게 찾기 위해 목에 다는) 소 방울, 카우 벨(독일 타악기)
35.'Electric_piano' : 전자피아노
36.'Meow' : 야옹(고양이 울음소리)
37.'Drawer_open_or_close' : 서랍 여닫는 소리
38.'Applause' : 박수 (갈채)
39.'Acoustic_guitar' : 어쿠스틱 기타, 클래식 기타
40.'Violin_or_fiddle' : 바이올린 과 피들
41.'Finger_snapping' : 손가락 튕기는 소리
8 changes: 4 additions & 4 deletions PROJECT2/mfcc_cnn_ksh.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
tf.set_random_seed(777)

#트레이닝/테스트 셋 각각 데이터/라벨 임포트
trainData = np.genfromtxt('/home/paperspace/Downloads/trainData6.csv', delimiter=',')
trainData = np.genfromtxt('/home/paperspace/Downloads/trainData8.csv', delimiter=',')
trainData = trainData.reshape(-1, 20, 430)
testData = np.genfromtxt('/home/paperspace/Downloads/testData6.csv', delimiter=',')
testData = np.genfromtxt('/home/paperspace/Downloads/testData8.csv', delimiter=',')
testData = testData.reshape(-1, 20, 430)
trainLabel = np.genfromtxt('/home/paperspace/Downloads/trainLabel6.csv', delimiter=',')
testLabel = np.genfromtxt('/home/paperspace/Downloads/testLabel6.csv', delimiter=',')
trainLabel = np.genfromtxt('/home/paperspace/Downloads/trainLabel8.csv', delimiter=',')
testLabel = np.genfromtxt('/home/paperspace/Downloads/testLabel8.csv', delimiter=',')

#임포트한 데이터가 원하는 데이터가 맞는지 shape을 통해 확인
print(trainData.shape, testData.shape, trainLabel.shape, testLabel.shape)
Expand Down
44 changes: 23 additions & 21 deletions PROJECT2/mfcc_processing_ksh.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#train/test, Data/Label split
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(train, test_size = 0.3)
train_set, test_set = train_test_split(train, test_size = 0.05)
trainfile = train_set.values[:,0]
testfile = test_set.values[:,0]
trainLabel = train_set.values[:,1]
Expand All @@ -40,7 +40,7 @@ def see_how_long(file):
#print(np.max(n2), np.min(n2)) #1292, 13

#show me approximate wave shape
filename= trainfile[11]
filename= trainfile[0]
y, sr = librosa.core.load(path+'audio_train/'+filename,
mono=True, res_type="kaiser_fast")
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
Expand All @@ -54,30 +54,26 @@ def see_how_long(file):
def five_sec_extract(file):
#zero padding to file.shape[0] X 20 X 430
n=file.shape[0]
array = np.zeros(n, 20, 430)
#array = np.repeat(0., n * 20 * 430).reshape(n, 20, 430)
array = np.zeros((n, 20, 430))
k=0
see = []
for filename in file:
y, sr = librosa.core.load(path+'audio_train/'+filename,
mono=True, res_type="kaiser_fast")
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
length=mfcc.shape[1]
abs_mfcc=np.abs(mfcc)
if length == 430:
array[k, :, :]=mfcc
elif length < 430:
tile_num = (430//length)+1
tile_array=np.tile(mfcc,tile_num)
mfcc=tile_array[:,0:430]
array[k, :, :]=mfcc
array[k, :, :]=np.tile(mfcc,tile_num)[:,0:430]
elif length > 430:
sample = np.zeros((20,length-430))
#sample = np.repeat(0., (length - 430)*20).reshape(20,length - 430)
for j in range(length - 430):
for i in range(20):
sample[i,j]=np.var(mfcc[i,j:j+430])
A=np.argmax(sample, axis=1)
start=np.argmax(np.bincount(A))
argmax=np.argmax(abs_mfcc, axis=1)
sample=[]
for i in range(np.max(argmax)):
sample.append(np.sum((argmax>=i) & (argmax <i+430)))
start=sample.index(max(sample))
array[k, :, :]=mfcc[:, start:start+430]
see.append(start)
k+=1
Expand All @@ -89,7 +85,7 @@ def five_sec_extract(file):
print(see2)

print(trainData.shape, testData.shape, trainLabel.shape, testLabel.shape)
# (6631, 20, 200) (2842, 20, 200) (6631,) (2842,)
# 트레이닝 셋 5%만 뽑음 (8999, 20, 430) (474, 20, 430) (8999,) (474,)

#라벨이 총 몇개가 되어야 하는지 확인
print(len(np.unique(trainLabel))) #41
Expand All @@ -111,14 +107,20 @@ def Labeling(label):

#트레이닝 및 테스트에 적절히 사용하기 위해 csv파일로 다운로드한다.
#(3D array는 csv파일로 저장이 안되므로 2D로 변환하여 저장)
trainData2D=trainData.reshape(-1, 20*200)
testData2D=testData.reshape(-1, 20*200)
np.savetxt(path+'trainData6.csv',
trainData2D=trainData.reshape(-1, 20*430)
testData2D=testData.reshape(-1, 20*430)
np.savetxt(path+'trainData8.csv',
trainData2D, delimiter=",")
np.savetxt(path+'testData6.csv',
np.savetxt(path+'testData8.csv',
testData2D, delimiter=",")
np.savetxt(path+'trainLabel6.csv',
np.savetxt(path+'trainLabel8.csv',
trainLabel, delimiter=",")
np.savetxt(path+'testLabel6.csv',
np.savetxt(path+'testLabel8.csv',
testLabel, delimiter=",")
np.savetxt(path+'testfile8.csv',
testfile, header = " ", fmt='%s')
np.array(testfile)
testfile.shape

#trainData8 <- mfcc, 20*430, train/test: 95%/5%

2 changes: 2 additions & 0 deletions PROJECT2/optx/checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "/home/paperspace/Downloads/optx/optx"
all_model_checkpoint_paths: "/home/paperspace/Downloads/optx/optx"
Binary file added PROJECT2/optx/optx.data-00000-of-00001
Binary file not shown.
Binary file added PROJECT2/optx/optx.index
Binary file not shown.
Binary file added PROJECT2/optx/optx.meta
Binary file not shown.
File renamed without changes.
Binary file added PROJECT2/optx2/optx2.data-00000-of-00001
Binary file not shown.
Binary file added PROJECT2/optx2/optx2.index
Binary file not shown.
Binary file not shown.
Binary file modified x_ksh/__pycache__/mfcc.cpython-36.pyc
Binary file not shown.
20 changes: 11 additions & 9 deletions x_ksh/mfcc.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
def five_sec_extract(file):
import librosa
import numpy as np
array = np.repeat(0., 20 * 430).reshape(20, 430)
#zero padding to file.shape[0] X 20 X 430
array = np.zeros((20, 430))
y, sr = librosa.core.load(file,
mono=True, res_type="kaiser_fast")
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
length=mfcc.shape[1]
abs_mfcc=np.abs(mfcc)
if length == 430:
array=mfcc
elif length < 430:
array[:, :length]=mfcc
tile_num = (430//length)+1
array=np.tile(mfcc,tile_num)[:,0:430]
elif length > 430:
sample = np.repeat(0., (length - 430)*20).reshape(20,length - 430)
for j in range(length - 430):
for i in range(20):
sample[i,j]=np.var(mfcc[i,j:j+430])
A=np.argmax(sample, axis=1)
start=np.argmax(np.bincount(A))
argmax=np.argmax(abs_mfcc, axis=1)
sample=[]
for i in range(np.max(argmax)):
sample.append(np.sum((argmax>=i) & (argmax <i+430)))
start=sample.index(max(sample))
array=mfcc[:, start:start+430]
return(array.reshape(20*430))
return(array.reshape(20*430))
Binary file removed x_ksh/optx2/optx2.data-00000-of-00001
Binary file not shown.
Binary file removed x_ksh/optx2/optx2.index
Binary file not shown.
3 changes: 3 additions & 0 deletions x_ksh/sound_pred.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np

opt_addr="/Users/kimseunghyuck/desktop/git/daegon/KYLius-method/x_ksh/optx2/optx2"

class sound_pred:
def __init__(self, opt_addr):
# initialize/ load
Expand All @@ -21,6 +23,7 @@ def __init__(self, opt_addr):
print("Variables Saved")

def tryit(self, soundaddr):

import librosa
import numpy as np
from mfcc_processing import five_sec_extract
Expand Down
Loading

0 comments on commit 71466b7

Please sign in to comment.