I think some int
type is missing...
error message
Traceback (most recent call last):
File"/Users/ymtk/Desktop/pyaudio/mfcc.py", line 109, in<module>
mfcc = MFCC (stf.SPEC.shape[1]*2, stf.frequency)
File "/Users/ymtk/Desktop/pyaudio/mfcc.py", line 29, in __init__
self.filterbank, self.fcenters=self.melFilterBank()
File"/Users/ymtk/Desktop/pyaudio/mfcc.py", line62, inmelFilterBank
filterbank [c, i] = (i-indexstart [c]) * increment
IndexError: only integers, slices(`:`), ellipsis(`...`), numpy.newaxis(`None`) and integer or boolean arrays are valid indications
source code
#!/usr/bin/env python
# coding=utf-8
import numpy
import scipy.ftpack
import scope.interpolate
import scipy.linalg
import sys
from stf import STF
class MFCC:
'''
MFCC computing from spectrum information
reference
---------
- - http://aidiary.hatenablog.com/entry/20120225/1330179868
'''
def__init__(self, nfft, frequency, dimension=16, channels=20):
self.nfft = nfft
self.frequency=frequency
self.dimension=dimension
self.channels = channels
self.fscale=\
numpy.ft.ftfreq (self.nfft, d=1.0/self.frequency) [:self.nfft/2]
self.filterbank, self.fcenters=self.melFilterBank()
defz2mel(self,f):
return1127.01048 * numpy.log(f/700.0+1.0)
defmel2hz(self,m):
return700.0* (number.exp(m/1127.01048) - 1.0)
defmelFilterBank (self):
# cover up to half the sampling frequency (Nyquist frequency)
fmax = self.frequency/2
melmax = self.hz2mel(fmax)
# calculate at half the number of samples according to the frequency
nmax = self.nfft/2
df = self.frequency/self.nfft
# calculate the central e-mail scale for each filter
dmel=melmax/(self.channels+1)
melcenters = numpy.range(1,self.channels+1) * dmel
fcenters=self.mel2hz(melcenters)
# calculate the range of frequencies for each sample
indexcenter= numpy.round (fcenters/df)
indexstart = numpy.hstack([0], indexcenter[0:self.channels-1]))
indexstop = numpy.hstack ((indexcenter[1:self.channels], [nmax]))
# Start indexstart for each filter, vertex indexcenter,
# calculate to draw a triangle graph ending at indexstop
filterbank = numpy.zeros(self.channels,nmax))
for cin numpy.range(0,self.channels):
increment=1.0/(indexcenter[c]-indexstart[c])
for i in numpy.range (indexstart[c], indexcenter[c]):
filterbank [c, i] = (i-indexstart [c]) * increment
decrement=1.0/(indexstop[c]-indexcenter[c])
for i in numpy.range (indexcenter[c], indexstop[c]):
filterbank [c, i] = 1.0 - ((i-indexcenter [c])*decrement)
filterbank[c]/=(indexstop[c]-indexstart[c])/2
return filterbank, fcenters
def mfcc(self, spectrum):
# treat as 0 if negative values are given as spectral envelope
spectrum=numpy.maximum(numpy.zeros(spectrum.shape), spectrum)
# take the logarithm of the product of spectral envelope and melfilter bank
mspectrum=numpy.log10(numpy.dot(spectrum,self.filterbank.transpose()))
# perform discrete cosine transformation using scipy
return scope.ftpack.dct(mspectrum, norm='ortho') [:self.dimension]
def delta (self, mfcc):
# The beginning and the end of the data shall be the same data.
mfcc = numpy.concentrate([[mfcc[0]], mfcc, [mfcc[-1]]]])
delta = None
for i in xrange (1, mfcc.shape[0]-1):
# The difference between the front and rear frames divided by 2 is defined as the dynamic variation.
slope=(mfcc[i+1]-mfcc[i-1])/2
if delta is None:
delta=slope
else:
delta = numpy.vstack ([ delta, slope ] )
return delta
def imfcc(self,mfcc):
# Perform inverse discrete cosine transformation after adding 0 to the cut part of the MFCC
mfcc = numpy.hstack ([mfcc, [0]*(self.channels-self.dimension)])
mspectrum=scipy.ftpack.idct(mfcc, norm='ortho')
# make the resulting discrete values continuous by spline interpolation
tck = scipy.interpolate.splrep(self.fcenters, numpy.power(10, mspectrum))
return scope.interpolate.splev(self.fscale, tck)
if__name__=='__main__':
iflen(sys.argv)<2:
print 'Usage: %s<ymtkyo.stf>'%sys.argv[0]
# sys.exit()
stf = STF()
stf.loadfile("/Users/ymtk/Desktop/pyaudio/ymtkyo.stf")
mfcc = MFCC (stf.SPEC.shape[1]*2, stf.frequency)
res=mfcc.mfcc(stf.SPEC[stf.SPEC.shape[0]/5])
spec=mfcc.imfcc(res)
print res
import pilab
pylab.subplot(211)
pylab.plot (stf.SPEC[stf.SPEC.shape[0]/5])
pylab.ylim(0,1.2)
pylab.subplot(212)
pylab.plot(spec)
pylab.ylim(0,1.2)
pilab.show()
indexcenter
is of type float64
.
I think it would be better to convert it to type int64
just like indexstart
, indexstop
.
indexcenter= numpy.round(fcenters/df).astype(numpy.int64)
This post was posted as a community wiki based on @metropolis' comments.
© 2024 OneMinuteCode. All rights reserved.