|  | TTS data pipline 
	class _dataset:
	
		def __init__(self,name,sess,ids): #['LJ001-0001','LJ001-0002'...]
		
			self.name=nameself.sess=sessself.transcript=np.loadtxt('./data/LJSpeech-1.1/transcript.csv',dtype=str,delimiter='|',encoding='utf-8')#[['LJ001-0001','printing , ...'],[..]...]self.id2text= {id:text for id,text in self.transcript[:,:2]} #{'LJ001-0001':'Printing,...','LJ001-0002':'Writing,..',...}self.texts = [self.id2text[text] for text in ids]   #['Printing,...','Writing,..',...]self.ids = ids          #['LJ001-0001','LJ001-0002' ...]self.ch2id = {v:id for id,v in enumerate(pa.vocab)}file_paths = ['./data/LJSpeech-1.1/wavs/{}.wav'.format(id) for id in self.ids]self.create_tf_dataset(file_paths,self.ids,hp.BATCH_SIZE)#self.text_code,self.decod_input, self.mel_spectro,self.linear_spectrodef text_to_code(self,text):
		
			text=pt.text_normalize(text)return pt.transform_text_to_code(text,self.ch2id,pt.NB_CHARS_MAX)def codes(self,offset) :
		
			code=np.asarray(self.text_to_code(self.texts[offset]))return codedef create_tf_dataset(self,file_paths,ids,batch_size) :
		
			def parse_function(data_path, id):
			
				def mel_linear_spectro_decod_input(data_path,id) : #'../wav/LJ001-0001.wav','LJ001-0001'
				
					mel_spectro,linear_spectro,decoder_input=pa.padded_mel_linear_spectro_decod_input(data_path)idstr=''.join([chr(b) for b in id])    #b'LJ001-0001'encoder_input=self.text_to_code(self.id2text[idstr])return [encoder_input,decoder_input, mel_spectro,linear_spectro]y= tf.py_func(mel_linear_spectro_decod_input,       #함수이름
 [data_path, id],                             #파라메터
 [tf.int32,tf.float32,tf.float32,tf.float32]) #반환타입
 return y
dataset =tf.data.Dataset.from_tensor_slices((file_paths,ids))dataset =dataset.map(parse_function, num_parallel_calls=4)  #함수를 4개의 session으로 병렬처리dataset =dataset.repeat().batch(batch_size) #배치보다작아지면 다시 채워서 반복dataset =dataset..prefetch(1)       #1 배치 씩 미리 준비한다.iter=tf.data.Iterator.from_structure((tf.int32,tf.float32,tf.float32,tf.float32),                                                             #output type
 ((batch_size, 200),(batch_size, 200, 80),(batch_size, 200, 400),(batch_size, 850, 513)))  #output shape
 #[encoder_input,    decoder_input,       mel_spectro,            linear_spectro]
self.encoder_code,self.decoder_input, self.mel_spectro,self.linear_spectro=iter.get_next()self.init_operation=iter.make_initializer(dataset)def init_op(self):
		
			self.sess.run(self.init_operation)사용예
	
		ds= _data('train', path_list,id_lis)ds.init_op()
 ds..encoder_code,ds..decoder_input, ds..mel_spectro,ds..linear_spectro
model.train_on_bath([ds..encoder_code,ds..decoder_input,] [ds..mel_spectro,ds..linear_spectro])  def parse_function(filename, label):
	
		return [label,label], labeldef get_dataset(filenames, labels,batch_size) :
	
		 dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))dataset = dataset.shuffle(len(filenames))
 dataset = dataset.map(parse_function, num_parallel_calls=4)
 dataset = dataset.batch(batch_size)
 dataset = dataset.prefetch(1)
 dataset = dataset.repeat()
 return dataset
filenames=['a','a','a','a','a','a','a','a','a','a']labels  =[1  ,2  ,3  ,4  ,5  ,6  ,7  ,8  ,9  ,10  ]
 batch_size=4
dataset=get_dataset(filenames, labels,batch_size)iterator = dataset.make_initializable_iterator()
 next_element = iterator.get_next()
 init_op = iterator.initializer
with tf.Session() as sess:
	
		sess.run(init_op)print(sess.run(next_element))
 print(sess.run(next_element))
 print(sess.run(next_element))
 print(sess.run(next_element))
"""(array([[2, 2],       [9, 9],       [1, 1],       [8, 8]]), array([2, 9, 1, 8]))
 (array([[ 6,  6],       [ 5,  5],       [10, 10],       [ 3,  3]]), array([ 6,  5, 10,  3]))
 (array([[4, 4],       [7, 7]]), array([4, 7]))
 (array([[4, 4],       [9, 9],       [3, 3],       [6, 6]]), array([4, 9, 3, 6]))
 """
   The built-in Input Pipeline. Never use ‘feed-dict’ anymore Updated to TensorFlow 1.8 A simple model using batching and switching between train and test dataset using a Initializable iterator 
	# Wrapping all together -> Switch between train and test set using Initializable iteratorEPOCHS = 10
# create a placeholder to dynamically switch between batch sizesbatch_size = tf.placeholder(tf.int64)x, y      = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()features, labels = iter.get_next() iter = dataset.make_initializable_iterator()
# using two numpy arraystrain_data = (np.random.sample((100,2)), np.random.sample((100,1)))test_data  = (np.random.sample((20,2)),  np.random.sample((20,1)))
# make a simple modelnet = tf.layers.dense(features, 8, activation=tf.tanh)# pass the first value from iter.get_next() as inputnet = tf.layers.dense(net, 8, activation=tf.tanh)prediction = tf.layers.dense(net, 1, activation=tf.tanh)loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as labeltrain_op = tf.train.AdamOptimizer().minimize(loss)
with tf.Session() as sess:
	
		sess.run(tf.global_variables_initializer())# initialise iterator with train datasess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})print('Training...')for i in range(EPOCHS):
		
			tot_loss = 0for _ in range(n_batches):
			
				_, loss_value = sess.run([train_op, loss]) #no feed_dicttot_loss += loss_valueprint("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))# initialise iterator with test datasess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]}) print('Test Loss: {:4f}'.format(sess.run(loss)))Training...Iter: 0, Loss: 0.2977
 Iter: 1, Loss: 0.2152
 Iter: 2, Loss: 0.1787
 Iter: 3, Loss: 0.1597
 Iter: 4, Loss: 0.1277
 Iter: 5, Loss: 0.1334
 Iter: 6, Loss: 0.1000
 Iter: 7, Loss: 0.1154
 Iter: 8, Loss: 0.0989
 Iter: 9, Loss: 0.0948
 Test Loss: 0.082150
   A simple model using batching and switching between train and test dataset using a Initializable iterator 
	# Wrapping all together -> Switch between train and test set using Reinitializable iteratorEPOCHS = 10
# create a placeholder to dynamically switch between batch sizesbatch_size = tf.placeholder(tf.int64)x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat() test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even if you want to one shot it
# using two numpy arraystrain_data = (np.random.sample((100,2)), np.random.sample((100,1)))test_data = (np.random.sample((20,2)), np.random.sample((20,1)))
# create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) features, labels = iter.get_next() # create the initialisation operations train_init_op = iter.make_initializer(train_dataset) test_init_op = iter.make_initializer(test_dataset)
# make a simple modelnet = tf.layers.dense(features, 8, activation=tf.tanh)# pass the first value from iter.get_next() as inputnet = tf.layers.dense(net, 8, activation=tf.tanh)prediction = tf.layers.dense(net, 1, activation=tf.tanh)loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as labeltrain_op = tf.train.AdamOptimizer().minimize(loss)
with tf.Session() as sess:
	
		sess.run(tf.global_variables_initializer())# initialise iterator with train datasess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16}) print('Training...')for i in range(EPOCHS):
		
			tot_loss = 0for _ in range(n_batches):
			
				_, loss_value = sess.run([train_op, loss])tot_loss += loss_valueprint("Iter: {}, Loss: {:.4f}".format(i, tot_loss / n_batches))# initialise iterator with test datasess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])}) print('Test Loss: {:4f}'.format(sess.run(loss))) references |  |