Merge pull request #27 from 1eedaegon/master

update
1eedaegon · May 3, 2018 · 71466b7 · 71466b7
2 parents 1868c64 + cab46d9
commit 71466b7
Show file tree

Hide file tree

Showing 18 changed files with 273 additions and 53 deletions.
diff --git a/PROJECT2/audio_conv1d_features.py b/PROJECT2/audio_conv1d_features.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May  2 19:21:50 2018
+
+@author: stu
+"""
+
+#전처리 code : extract_features.py
+
+import numpy as np
+import os
+import glob
+import tensorflow as tf
+import pandas as pd
+
+tf.set_random_seed(777) 
+
+train_info = pd.read_csv("C:/data/sound/train.csv",delimiter=',')
+train_data = np.genfromtxt("C:/data/sound/feature_train.csv", delimiter=',')
+
+#label set : 라벨 값을 0~41로 바꿈
+
+def labels2Num(labels):
+    df_label = pd.DataFrame(labels)
+    l = train_info['label'].unique()
+    for i in range(len(l)):
+        df_label[df_label==l[i]] = i
+    return df_label
+
+labels = train_info['label']
+df_label = labels2Num(labels)    
+
+#train data set    
+train_data = pd.DataFrame(train_data)
+train_data['label']=df_label
+train_data = train_data.astype(np.float32)
+
+#훈련세트, validation세트 나누기
+from sklearn.model_selection import train_test_split
+train_set, validate_set = train_test_split(train_data, test_size = 0.3)
+trainData = train_set.values[:,0:193]  
+trainLabel = train_set.values[:,-1]
+validateData = validate_set.values[:,0:193]
+validataLabel = validate_set.values[:,-1]
+
+
+# 텐서플로우 모델 생성
+tf.reset_default_graph()
+
+n_dim = 193
+n_classes = 41
+training_epochs = 700
+learning_rate = 0.001
+batch_size = 100
+steps_for_validate = 5
+keep_prob = tf.placeholder(tf.float32)
+
+X = tf.placeholder(tf.float32, [None, n_dim])
+X_1d = tf.reshape(X, [-1,193,1])
+Y = tf.placeholder(tf.int32, [None, 1])
+Y_onehot=tf.reshape(tf.one_hot(Y, 41), [-1, 41])
+p_keep_conv = tf.placeholder(tf.float32, name='p_keep_conv')
+p_keep_hidden = tf.placeholder(tf.float32, name='p_keep_hidden')
+193*1.5
+#convolution layer 1
+c1 = tf.layers.conv1d(X_1d, 386,kernel_size=2, strides=1, padding='Same',
+                     activation=tf.nn.relu, name='c1')
+n1 = tf.layers.batch_normalization(c1)
+p1 = tf.layers.max_pooling1d(n1, pool_size=2, strides=2, padding='Same')
+p1 = tf.nn.dropout(p1, p_keep_conv)
+386*1.5
+#shape=(?, 97, 386)
+c2 = tf.layers.conv1d(p1, 579,kernel_size=2, strides=1, padding='Same',
+                     activation=tf.nn.relu, name='c2')
+n2 = tf.layers.batch_normalization(c2)
+p2 = tf.layers.max_pooling1d(n2, pool_size=2, strides=2, padding='Same')
+p2 = tf.nn.dropout(p2, p_keep_conv)
+
+#shape=(?, 49, 579)
+c3 = tf.layers.conv1d(p2, 579,kernel_size=1, strides=1, padding='Same',
+                     activation=tf.nn.relu, name='c3')
+n3 = tf.layers.batch_normalization(c3)
+p3 = tf.layers.max_pooling1d(n3, pool_size=2, strides=2, padding='Same')
+p3 = tf.nn.dropout(p3, p_keep_conv)
+
+#shape=(?, 25, 579)
+L4_flat = tf.reshape(p3, [-1,25*579])
+
+W4 = tf.get_variable("W4", shape=[25*579, 624], initializer=tf.contrib.layers.xavier_initializer())
+L5 = tf.nn.relu(tf.matmul(L4_flat, W4))
+n5 = tf.layers.batch_normalization(L5)
+L5 = tf.nn.dropout(n5, p_keep_hidden)
+
+W5 = tf.get_variable("W5", shape=[624,41], initializer=tf.contrib.layers.xavier_initializer())
+b = tf.Variable(tf.random_normal([41]))
+logits = tf.matmul(L5, W5) + b
+
+# define cost/loss & optimizer
+cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels= Y_onehot))
+optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 아담버젼
+predict_op = tf.argmax(logits, 1, name="pred")
+
+
+# initialize
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+saver = tf.train.Saver()
+
+
+# train my model
+
+print('Learning started. It takes sometime.')
+for epoch in range(training_epochs):
+    avg_cost = 0
+    total_batch = int(len(trainData) / batch_size)
+    for i in range(total_batch):
+        batch_xs = trainData[i*batch_size:(i+1)*batch_size]
+        batch_ys = trainLabel[i*batch_size:(i+1)*batch_size].reshape(-1, 1)
+        feed_dict = {X: batch_xs, Y: batch_ys, p_keep_conv: .7, p_keep_hidden: .5}
+        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
+        avg_cost += c / total_batch
+    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
+    if epoch % steps_for_validate == steps_for_validate-1:
+        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y_onehot, 1))
+        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+        print('Accuracy:', sess.run(accuracy, feed_dict={
+                X: validateData, Y: validataLabel.reshape(-1, 1), p_keep_conv: 1, p_keep_hidden: 1}))       
+print('Finished!')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/PROJECT2/label_ksw.txt b/PROJECT2/label_ksw.txt
@@ -0,0 +1,41 @@
+1.'Hi-hat' : 풋 심벌즈(드럼에 달린, 발로 치는 심벌즈)
+2.'Saxophone' : 섹소폰
+3.'Trumpet' : 트럼펫, 나팔
+4.'Glockenspiel' : 종금이라고도 불리는 오르프악기
+5.'Cello' : 첼로
+6.'Knock' : (문 등을 똑똑 하고) 두드리다, 노크하다 
+7.'Gunshot_or_gunfire' : 발포, 총격; 총소리
+8.'Clarinet' : 클라리넷
+9.'Computer_keyboard' : 컴퓨터 키보드
+10.'Keys_jangling' : 열쇠 쨍그랑 소리
+11.'Snare_drum' : 스네어드럼(뒷면에 쇠 울림줄을 댄 작은 북)
+12.'Writing' : (글자・숫자를) 쓰다 
+13.'Laughter' : (소리내어) 웃다
+14.'Tearing' : 찢다, 뜯다; 찢어지다 
+15.'Fart' : (특히 소리가 크게 나게) 방귀를 뀌다 
+16.'Oboe' : 오보에
+17.'Flute' : 플루트
+18.'Cough' : 기침하다
+19.'Telephone' : 전화; 전화기
+20.'Bark' : 1. (개 등이) 짖는 소리   2. (총・사람 목소리 등이 짧게 내는) 큰 소리
+21.'Chime' : (차임벨) 소리
+22.'Bass_drum' : 베이스 드럼
+23.'Bus' : 버스
+24.'Squeak' : (그렇게 크지 않게) 끽 하는 소리를 내다 
+25.'Scissors' : 가위
+26.'Harmonica' : 하모니카
+27.'Gong' : (악기・신호용) 공 , 권투등에 시작을 알리는 것
+28.'Microwave_oven' : 전자 레인지
+29.'Burping_or_eructation' : 트림
+30.'Double_bass' : 더블 베이스
+31.'Shatter' :  산산이 부서지다, 산산조각 나다
+32.'Fireworks' : 불꽃놀이
+33.'Tambourine' : 트랜버린 , 탬버린
+34.'Cowbell' : (소를 쉽게 찾기 위해 목에 다는) 소 방울, 카우 벨(독일 타악기)
+35.'Electric_piano' : 전자피아노
+36.'Meow' : 야옹(고양이 울음소리)
+37.'Drawer_open_or_close' : 서랍 여닫는 소리
+38.'Applause' : 박수 (갈채)
+39.'Acoustic_guitar' : 어쿠스틱 기타, 클래식 기타
+40.'Violin_or_fiddle' : 바이올린 과 피들
+41.'Finger_snapping' : 손가락 튕기는 소리
diff --git a/PROJECT2/mfcc_cnn_ksh.py b/PROJECT2/mfcc_cnn_ksh.py
@@ -9,12 +9,12 @@
 tf.set_random_seed(777) 
 
 #트레이닝/테스트 셋 각각 데이터/라벨 임포트
-trainData = np.genfromtxt('/home/paperspace/Downloads/trainData6.csv', delimiter=',')
+trainData = np.genfromtxt('/home/paperspace/Downloads/trainData8.csv', delimiter=',')
 trainData = trainData.reshape(-1, 20, 430)
-testData = np.genfromtxt('/home/paperspace/Downloads/testData6.csv', delimiter=',')
+testData = np.genfromtxt('/home/paperspace/Downloads/testData8.csv', delimiter=',')
 testData = testData.reshape(-1, 20, 430)
-trainLabel = np.genfromtxt('/home/paperspace/Downloads/trainLabel6.csv', delimiter=',')
-testLabel = np.genfromtxt('/home/paperspace/Downloads/testLabel6.csv', delimiter=',')
+trainLabel = np.genfromtxt('/home/paperspace/Downloads/trainLabel8.csv', delimiter=',')
+testLabel = np.genfromtxt('/home/paperspace/Downloads/testLabel8.csv', delimiter=',')
 
 #임포트한 데이터가 원하는 데이터가 맞는지 shape을 통해 확인
 print(trainData.shape, testData.shape, trainLabel.shape, testLabel.shape)

diff --git a/PROJECT2/mfcc_processing_ksh.py b/PROJECT2/mfcc_processing_ksh.py
@@ -14,7 +14,7 @@
 
 #train/test, Data/Label split
 from sklearn.model_selection import train_test_split
-train_set, test_set = train_test_split(train, test_size = 0.3)
+train_set, test_set = train_test_split(train, test_size = 0.05)
 trainfile = train_set.values[:,0]
 testfile = test_set.values[:,0]
 trainLabel = train_set.values[:,1]
@@ -40,7 +40,7 @@ def see_how_long(file):
 #print(np.max(n2), np.min(n2))    #1292, 13
 
 #show me approximate wave shape
-filename= trainfile[11]
+filename= trainfile[0]
 y, sr = librosa.core.load(path+'audio_train/'+filename, 
                           mono=True, res_type="kaiser_fast")
 mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
@@ -54,30 +54,26 @@ def see_how_long(file):
 def five_sec_extract(file):
     #zero padding to file.shape[0] X 20 X 430
     n=file.shape[0]
-    array = np.zeros(n, 20, 430)
-    #array = np.repeat(0., n * 20 * 430).reshape(n, 20, 430)
+    array = np.zeros((n, 20, 430))
     k=0
     see = []
     for filename in file:
         y, sr = librosa.core.load(path+'audio_train/'+filename, 
                                   mono=True, res_type="kaiser_fast")
         mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
         length=mfcc.shape[1]
+        abs_mfcc=np.abs(mfcc)
         if length == 430:
             array[k, :, :]=mfcc
         elif length < 430:
             tile_num = (430//length)+1
-            tile_array=np.tile(mfcc,tile_num)
-            mfcc=tile_array[:,0:430]
-            array[k, :, :]=mfcc
+            array[k, :, :]=np.tile(mfcc,tile_num)[:,0:430]
         elif length > 430:
-            sample = np.zeros((20,length-430))
-            #sample = np.repeat(0., (length - 430)*20).reshape(20,length - 430)
-            for j in range(length - 430):
-                for i in range(20):
-                    sample[i,j]=np.var(mfcc[i,j:j+430])
-            A=np.argmax(sample, axis=1)
-            start=np.argmax(np.bincount(A))
+            argmax=np.argmax(abs_mfcc, axis=1)
+            sample=[]
+            for i in range(np.max(argmax)):
+                 sample.append(np.sum((argmax>=i) & (argmax <i+430)))
+            start=sample.index(max(sample))
             array[k, :, :]=mfcc[:, start:start+430]
             see.append(start)
         k+=1
@@ -89,7 +85,7 @@ def five_sec_extract(file):
 print(see2)
 
 print(trainData.shape, testData.shape, trainLabel.shape, testLabel.shape)
-# (6631, 20, 200) (2842, 20, 200) (6631,) (2842,)
+# 트레이닝 셋 5%만 뽑음 (8999, 20, 430) (474, 20, 430) (8999,) (474,)
 
 #라벨이 총 몇개가 되어야 하는지 확인
 print(len(np.unique(trainLabel)))   #41
@@ -111,14 +107,20 @@ def Labeling(label):
 
 #트레이닝 및 테스트에 적절히 사용하기 위해 csv파일로 다운로드한다. 
 #(3D array는 csv파일로 저장이 안되므로 2D로 변환하여 저장)
-trainData2D=trainData.reshape(-1, 20*200)
-testData2D=testData.reshape(-1, 20*200)
-np.savetxt(path+'trainData6.csv', 
+trainData2D=trainData.reshape(-1, 20*430)
+testData2D=testData.reshape(-1, 20*430)
+np.savetxt(path+'trainData8.csv', 
            trainData2D, delimiter=",")
-np.savetxt(path+'testData6.csv', 
+np.savetxt(path+'testData8.csv', 
            testData2D, delimiter=",")
-np.savetxt(path+'trainLabel6.csv', 
+np.savetxt(path+'trainLabel8.csv', 
            trainLabel, delimiter=",")
-np.savetxt(path+'testLabel6.csv', 
+np.savetxt(path+'testLabel8.csv', 
            testLabel, delimiter=",")
+np.savetxt(path+'testfile8.csv', 
+           testfile, header = " ", fmt='%s')
+np.array(testfile)
+testfile.shape
+
+#trainData8 <- mfcc, 20*430, train/test: 95%/5%
 
diff --git a/PROJECT2/optx/checkpoint b/PROJECT2/optx/checkpoint
@@ -0,0 +1,2 @@
+model_checkpoint_path: "/home/paperspace/Downloads/optx/optx"
+all_model_checkpoint_paths: "/home/paperspace/Downloads/optx/optx"
diff --git a/PROJECT2/optx/optx.data-00000-of-00001 b/PROJECT2/optx/optx.data-00000-of-00001
diff --git a/PROJECT2/optx/optx.index b/PROJECT2/optx/optx.index
diff --git a/PROJECT2/optx/optx.meta b/PROJECT2/optx/optx.meta
diff --git a/x_ksh/optx2/checkpoint → PROJECT2/optx2/checkpoint b/x_ksh/optx2/checkpoint → PROJECT2/optx2/checkpoint
diff --git a/PROJECT2/optx2/optx2.data-00000-of-00001 b/PROJECT2/optx2/optx2.data-00000-of-00001
diff --git a/PROJECT2/optx2/optx2.index b/PROJECT2/optx2/optx2.index
diff --git a/x_ksh/optx2/optx2.meta → PROJECT2/optx2/optx2.meta b/x_ksh/optx2/optx2.meta → PROJECT2/optx2/optx2.meta
diff --git a/x_ksh/__pycache__/mfcc.cpython-36.pyc b/x_ksh/__pycache__/mfcc.cpython-36.pyc
diff --git a/x_ksh/mfcc.py b/x_ksh/mfcc.py
@@ -1,21 +1,23 @@
 def five_sec_extract(file):
     import librosa
     import numpy as np
-    array = np.repeat(0., 20 * 430).reshape(20, 430)
+    #zero padding to file.shape[0] X 20 X 430
+    array = np.zeros((20, 430))
     y, sr = librosa.core.load(file, 
                               mono=True, res_type="kaiser_fast")
     mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
     length=mfcc.shape[1]
+    abs_mfcc=np.abs(mfcc)
     if length == 430:
         array=mfcc
     elif length < 430:
-        array[:, :length]=mfcc
+        tile_num = (430//length)+1
+        array=np.tile(mfcc,tile_num)[:,0:430]
     elif length > 430:
-        sample = np.repeat(0., (length - 430)*20).reshape(20,length - 430)
-        for j in range(length - 430):
-            for i in range(20):
-                sample[i,j]=np.var(mfcc[i,j:j+430])
-        A=np.argmax(sample, axis=1)
-        start=np.argmax(np.bincount(A))
+        argmax=np.argmax(abs_mfcc, axis=1)
+        sample=[]
+        for i in range(np.max(argmax)):
+            sample.append(np.sum((argmax>=i) & (argmax <i+430)))
+        start=sample.index(max(sample))
         array=mfcc[:, start:start+430]
-    return(array.reshape(20*430))
+    return(array.reshape(20*430))
diff --git a/x_ksh/optx2/optx2.data-00000-of-00001 b/x_ksh/optx2/optx2.data-00000-of-00001
diff --git a/x_ksh/optx2/optx2.index b/x_ksh/optx2/optx2.index
diff --git a/x_ksh/sound_pred.py b/x_ksh/sound_pred.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import numpy as np
 
+opt_addr="/Users/kimseunghyuck/desktop/git/daegon/KYLius-method/x_ksh/optx2/optx2"
+
 class sound_pred:
     def __init__(self, opt_addr):
         # initialize/ load
@@ -21,6 +23,7 @@ def __init__(self, opt_addr):
         print("Variables Saved")
 
     def tryit(self, soundaddr):
+
         import librosa
         import numpy as np
         from mfcc_processing import five_sec_extract
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		model_checkpoint_path: "/home/paperspace/Downloads/optx/optx"
		all_model_checkpoint_paths: "/home/paperspace/Downloads/optx/optx"