www.wins.or.kr(IISPL)

커뮤니티

	강좌게시판


		WEB-PROG


			WP-알림판


			WP-자료실


			WP-보고서


		DP 알림판


		C.P.


			자료실 (C. Photo.)


		WebSC


			WSC News


			WSC Info.


			RPT & QnA


			WebSC Q/A


		SR Board(V)


			SR자료게시판


			SR레포트


			SR공지사항


		OS전체


			OS보고서


			OS Board


			O/S exclude


			OS Q/A


		COMPILER


			CMPL Boards


			컴파일러 레포트(A)


			CMPL RPRT(B)


			CMPLg RPT


		SSE


			SSE Board


			SSE RPT


		DSP


			DSP/SPEECH


			DSP레포트1


			DSP news


			DSP레포트2


		Multimedia


			Report


		C#


			C# RPT


		Notice


		데이터로표현하는세상


			Report

DSP/SPEECH

제목: python코드 - wave 로드,저장,play, 스펙트럼, griffinlim 신호 재생

3212 김윤중

dynamic play / audio stream play
- import numpy as np
  import sounddevice as sd
  
  # synthesize a pure tone at 440 Hz:
  sr = 22050 # sample rate
  T = 2.0 # duration in seconds
  t = np.linspace(0, T, int(T*sr), endpoint=False) # time variable
  x = 0.5*np.sin(2*np.pi*440*t)
  
  sd.play(x,sr)#play audio data holded in nparray x with sampling rate sr
  sd.wait() #wait until playaback is finished
오디오 load/write/STFT/ISTFT/griffinlim
- import librosa
  import librosa.display
  #load audio signal from 30 sec to 35 sec from 'example_audio.wav'
  y, sr = librosa.load('example_audio.wav', offset=30, duration=5)
  # Get the magnitude spectrogram
  S = np.abs(librosa.stft(y))
  # Invert using Griffin-Lim
  y_inv = librosa.griffinlim(S)
  # Invert without estimating phase
  y_istft = librosa.istft(S)
  
  librosa.output.write_wav('origin.wav',y,sr)
  librosa.output.write_wav('reconstructed_griffinlim.wav',y_inv,sr)
plot wave form and spectrum
- import librosa
  import librosa.display
- #plot wave form
  plt.subplot(121)
  y, sr = librosa.load('example_audio.wav', offset=30, duration=5)
  librosa.display.waveplot(y, sr=sr, color='b')
- #linear spectrogram
  plt.subplot(122)
  S = librosa.stft(y) #create linear spectrum incomplex number
  Sdb = librosa.amplitude_to_db(abs(S),ref=np.max) #convert abs value to deci bell
  librosa.display.specshow(Sdb,sr=sr,x_axis='time',y_axis='log') #plot linear spectrum on log scale
  plt.colorbar(format='%+2.0f dB')
  plt.show()
melspectrogram
- filename = 'audio/Haunting_song_of_humpback_whales-youtube-W5Trznre92c.wav'
  y, sr = librosa.load(filename) #y(946688,) sr=22050
  
  #Compute a mel-scaled spectrogram
  S_mel=librosa.feature.melspectrogram(y=y, sr=sr,n_fft=2048, hop_length=sr//4, n_mels=128)
  #s:(n_mels,y.size//hop_length)=(128,1850)
  
  #convert magnitude in decibel
  S_mel_dB = librosa.power_to_db(S_mel, ref=np.max) #(128,1850)
  #plot S_mel_dB(mel_spectrogram in debel) on mel frequency scale
  librosa.display.specshow(S_mel_dB, x_axis='time', y_axis='mel', sr=sr, fmax=sr //2)
  plt.colorbar(format='%+2.0f dB')
  plt.show()
- 음성파일 wav_fn의 신호를 초당 100개의 mel_spectrogram vector 로 변환
         # (100n,8) /n sec wav
         #입력 : sample 길이 48000(16000x3초) sound signal
         #출력 : feature timestep=300, dimension:80
         # fft: win_lwngth=n_fft=2048(0.128초)
  y, sr = librosa.load(wav_fn,sr=16000)                       #48000=16000x3초
  C = librosa.feature.melspectrogram(y, sr, None,
                 n_fft=2048, hop_length=160, power=2.0, n_mels = 80)   #(80,48000/160=300+1)
  C = librosa.core.amplitude_to_db(C)                           #(80,301)
  C = pre.minmax_scale(C,feature_range=(-1,1), axis=0).T[:-1] #(80,301)=>(300,80)
- 참고
  - S=librosa.feature.melspectrogram(y=y, sr=sr,S=None, n_fft=2048, hop_length=512, n_mels=128,win_length=None, window='hann', center=True, pad_mode='reflect', power=2.0)
    #s:(n_mels,y.size//hop_length)=(128,1850)
    - if unspecified, default to win_length=n_fft
  - D = np.abs(librosa.stft(y))**2 #(1025,1850) Compute a linear stft and powered
    S = librosa.feature.melspectrogram(S=D, sr=sr) #(128,1850) convert mel scale
spectrum으로부터 신호의 재구성
- S = librosa.stft(y) #create linear spectrum incomplex number from audio signal
  y_inv = librosa.griffinlim(np.abs(S)) #reconstruct audio signal from sptectrum
  librosa.output.write_wav('origin.wav',y,sr)
  librosa.output.write_wav('reconstructed_griffinlim.wav',y_inv,sr)
waves
- beat notes
  - x1(t)=cos(2 pi 200 t) beat_x1.wav
  - x2(t)=cos(2 pi 180 t) beat_x2.wav
  - x3(t)=x1(t)+x2(t) beat_x3.wav
  - xc(t)=cos(2 pi 190 t), xv(t)=2cos(2 pi 10 t)
- 4 octave - generated sample
  - piano.wav