i want to find mfcc of each frame of a song. can you tell me how to find mfcc. i found this code but unable to understand what is it doing. this code is printing an array and duration and period. But i heard that mfcc gives you some co-eficient against each frame. can you help me in how to use this code.
Here is the code
This is the main program (timeseries.py):
import numpy as N
__all__ = [
'timeseries'
]
class InfoArray(N.ndarray):
def __new__(subtype, data, dtype=None, copy=True, **kwargs):
if isinstance(data, InfoArray):
if not copy and dtype==data.dtype:
return data.view(subtype)
else:
return data.astype(dtype).view(subtype)
for f in subtype._fields_:
if kwargs.has_key(f):
value = kwargs[f]
else:
value = None
setattr(subtype, '_' + f, value)
setattr(subtype, f, value)
for kw in kwargs:
if not hasattr(subtype, kw):
raise TypeError, 'invalid keyword argument \'%s\'' % (kw,)
return N.array(data).view(subtype)
def __array_finalize__(self, obj):
for f in self._fields_:
if hasattr(obj, f):
setattr(self, f, getattr(obj, f))
else:
setattr(self, f, getattr(self, '_' + f))
class timeseries(N.ndarray):
def __new__(subtype, data, sampling_frequency=0.0, info=None, dtype=None, copy=True):
# when data is a timeseries
if isinstance(data, timeseries):
if not copy and dtype==data.dtype:
return data.view(subtype)
else:
return data.astype(dtype).view(subtype)
subtype._sampling_frequency = sampling_frequency
subtype.sampling_frequency = subtype._sampling_frequency
subtype._info = info
subtype.info = subtype._info
return N.array(data).view(subtype)
def __array_finalize__(self, obj):
attrs = ['sampling_frequency', 'info']
for attr in attrs:
if hasattr(obj, attr):
setattr(self, attr, getattr(obj, attr))
else:
setattr(self, attr, getattr(self, '_' + attr))
def __repr__(self):
desc = """array(data=\n %(data)s,\nsampling_frequency=%(fs)f)"""
return desc % {'data' : str(self),
'fs' : self.sampling_frequency}
if __name__ == '__main__':
class MFCC(InfoArray):
_fields_ = ['duration', 'period']
def bar(self):
print self
x = MFCC([1.,2.,3.], duration=20e-3, period=10e-3)
print x
print x.duration
print x.period
x.bar()
and this is the code of MFCC class (mfcc.py)
import numpy as N
from timeseries import timeseries
from util import exactly_2d
__all__ = [
'triang',
'linear2mel',
'mel2linear',
'MelFilterbank',
'MFCC'
]
# triangular window from SciPy
def triang(M, sym=1):
"""The M-point triangular window."""
if M < 1:
return N.array([])
if M == 1:
return N.ones(1,'d')
odd = M % 2
if not sym and not odd:
M = M + 1
n = N.arange(1,int((M+1)/2)+1)
if M % 2 == 0:
w = (2*n-1.0)/M
w = N.r_[w, w[::-1]]
else:
w = 2*n/(M+1.0)
w = N.r_[w, w[-2::-1]]
if not sym and not odd:
w = w[:-1]
return w
def linear2mel(linfreq):
melfreq = N.atleast_1d(linfreq).copy()
melfreq /= 700.0
melfreq += 1.0
melfreq[:] = N.log(melfreq)
melfreq *= 1127.0
return melfreq.squeeze()
def mel2linear(melfreq):
linfreq = N.atleast_1d(melfreq).copy()
linfreq /= 1127.0
linfreq[:] = N.exp(linfreq)
linfreq -= 1.0
linfreq *= 700.0
return linfreq.squeeze()
class MelFilterbank:
def __init__(self, nfilters, startfreq, stopfreq, filter_window):
"""
Parameters:
- `nfilters`: number of filters.
- `startfreq`: linear frequency where first filter begins.
- `stopfreq`: linear frequency where last filter ends.
- `filter_window`: window to use for each filter.
"""
self.nfilters = nfilters
self.filter_window = filter_window
# make sure we're working with floating point values
startfreq, stopfreq = map(float, [startfreq, stopfreq])
# convert start and stop frequencies
melstartfreq = linear2mel(startfreq)
melstopfreq = linear2mel(stopfreq)
# step between start of filters
melstepfreq = (melstopfreq - melstartfreq) / (nfilters + 1.0)
# start Mel frequencies of filters
starts = N.arange(0., nfilters)
starts *= melstepfreq
starts += melstartfreq
# stop Mel frequencies of filters
stops = N.arange(0., nfilters)
stops += 2.0
stops *= melstepfreq
stops += melstartfreq
# filter bands in Mel frequency
self.filter_bands = zip(starts, stops)
def apply(self, frames, sampling_frequency):
# XXX figure out if we want power spectrum or energy spectrum here
# XXX rename variables accordingly
frames = exactly_2d(frames)
# convert frames from time domain to power spectrum
n = int(2**N.ceil(N.log2(frames.shape[-1])))
power_spectrum = N.abs(N.fft.rfft(frames, n))
# samples per frequency
sample_step = frames.shape[-1] / sampling_frequency
# beginning and end of filters in samples
filter_samples = [(int(N.ceil(start * sample_step)),
int(N.ceil(stop * sample_step)))
for start, stop in self.filter_bands]
energies = N.zeros((frames.shape[0], self.nfilters))
for i, (start_sample, stop_sample) in enumerate(filter_samples):
band_nsamples = stop_sample - start_sample + 1
band_slice = slice(start_sample,
N.min((stop_sample,
power_spectrum.shape[-1])))
if band_slice.start >= band_slice.stop:
# break if signal's highest frequency is less than the
# start frequency of this filter
break
band_spectrum = power_spectrum[band_slice, :].copy()
# apply filter window function to spectrum samples
window = self.filter_window(band_nsamples)
band_spectrum *= window[band_slice]
filter_energy = energies[:, i]
filter_energy[:] = band_spectrum.sum(axis=-1)
filter_energy /= band_nsamples
return energies
class MFCC:
def __init__(self, period, duration, frame_window, filterbank):
"""
Parameters:
- `period`: frame period in seconds.
- `duration`: frame duration in seconds.
- `frame_window`: window to apply to each frame.
"""
self.period = period
self.duration = duration
self.frame_window = frame_window
self.filterbank = filterbank
def apply(self, signal, dtype=None):
#signal = self._normalize(signal, dtype)
#self._preemphasis(signal, alpha=0.98)
frames = self._split_into_frames(signal)
energies = self.filterbank.apply(frames,
signal.sampling_frequency)
spectra = N.log(energies)
spectra *= 20
# XXX discrete cosine transform
# XXX lifter
def _normalize(self, signal, dtype):
signal = timeseries(signal, dtype=dtype)
signal -= signal.mean()
signal /= N.max(N.abs(signal))
return signal
def _preemphasis(self, signal, alpha):
"""Apply preemphasis to signal inplace."""
signal[1:] -= alpha * signal[:-1]
def _split_into_frames(self, signal):
# split signal into frames
fs = signal.sampling_frequency
period_samples = int(fs * self.period)
duration_samples = int(fs * self.duration)
starts = range(0, len(signal) - duration_samples + 1, period_samples)
stops = [x + duration_samples for x in starts]
frames = N.array([signal[s] for s in
[slice(*r) for r in zip(starts,stops)]])
# apply frame window to each frame
window = self.frame_window(duration_samples)
frames *= window
return frames
You have to save both codes in same folder and run the timeseries code