I think some int
type is missing...
error message
Traceback (most recent call last):
File"/Users/ymtk/Desktop/pyaudio/mfcc.py", line 109, in<module>
mfcc = MFCC (stf.SPEC.shape[1]*2, stf.frequency)
File "/Users/ymtk/Desktop/pyaudio/mfcc.py", line 29, in __init__
self.filterbank, self.fcenters=self.melFilterBank()
File"/Users/ymtk/Desktop/pyaudio/mfcc.py", line62, inmelFilterBank
filterbank [c, i] = (i-indexstart [c]) * increment
IndexError: only integers, slices(`:`), ellipsis(`...`), numpy.newaxis(`None`) and integer or boolean arrays are valid indications
source code
#!/usr/bin/env python
# coding=utf-8
import numpy
import scipy.ftpack
import scope.interpolate
import scipy.linalg
import sys
from stf import STF
class MFCC:
'''
MFCC computing from spectrum information
reference
---------
- - http://aidiary.hatenablog.com/entry/20120225/1330179868
'''
def__init__(self, nfft, frequency, dimension=16, channels=20):
self.nfft = nfft
self.frequency=frequency
self.dimension=dimension
self.channels = channels
self.fscale=\
numpy.ft.ftfreq (self.nfft, d=1.0/self.frequency) [:self.nfft/2]
self.filterbank, self.fcenters=self.melFilterBank()
defz2mel(self,f):
return1127.01048 * numpy.log(f/700.0+1.0)
defmel2hz(self,m):
return700.0* (number.exp(m/1127.01048) - 1.0)
defmelFilterBank (self):
# cover up to half the sampling frequency (Nyquist frequency)
fmax = self.frequency/2
melmax = self.hz2mel(fmax)
# calculate at half the number of samples according to the frequency
nmax = self.nfft/2
df = self.frequency/self.nfft
# calculate the central e-mail scale for each filter
dmel=melmax/(self.channels+1)
melcenters = numpy.range(1,self.channels+1) * dmel
fcenters=self.mel2hz(melcenters)
# calculate the range of frequencies for each sample
indexcenter= numpy.round (fcenters/df)
indexstart = numpy.hstack([0], indexcenter[0:self.channels-1]))
indexstop = numpy.hstack ((indexcenter[1:self.channels], [nmax]))
# Start indexstart for each filter, vertex indexcenter,
# calculate to draw a triangle graph ending at indexstop
filterbank = numpy.zeros(self.channels,nmax))
for cin numpy.range(0,self.channels):
increment=1.0/(indexcenter[c]-indexstart[c])
for i in numpy.range (indexstart[c], indexcenter[c]):
filterbank [c, i] = (i-indexstart [c]) * increment
decrement=1.0/(indexstop[c]-indexcenter[c])
for i in numpy.range (indexcenter[c], indexstop[c]):
filterbank [c, i] = 1.0 - ((i-indexcenter [c])*decrement)
filterbank[c]/=(indexstop[c]-indexstart[c])/2
return filterbank, fcenters
def mfcc(self, spectrum):
# treat as 0 if negative values are given as spectral envelope
spectrum=numpy.maximum(numpy.zeros(spectrum.shape), spectrum)
# take the logarithm of the product of spectral envelope and melfilter bank
mspectrum=numpy.log10(numpy.dot(spectrum,self.filterbank.transpose()))
# perform discrete cosine transformation using scipy
return scope.ftpack.dct(mspectrum, norm='ortho') [:self.dimension]
def delta (self, mfcc):
# The beginning and the end of the data shall be the same data.
mfcc = numpy.concentrate([[mfcc[0]], mfcc, [mfcc[-1]]]])
delta = None
for i in xrange (1, mfcc.shape[0]-1):
# The difference between the front and rear frames divided by 2 is defined as the dynamic variation.
slope=(mfcc[i+1]-mfcc[i-1])/2
if delta is None:
delta=slope
else:
delta = numpy.vstack ([ delta, slope ] )
return delta
def imfcc(self,mfcc):
# Perform inverse discrete cosine transformation after adding 0 to the cut part of the MFCC
mfcc = numpy.hstack ([mfcc, [0]*(self.channels-self.dimension)])
mspectrum=scipy.ftpack.idct(mfcc, norm='ortho')
# make the resulting discrete values continuous by spline interpolation
tck = scipy.interpolate.splrep(self.fcenters, numpy.power(10, mspectrum))
return scope.interpolate.splev(self.fscale, tck)
if__name__=='__main__':
iflen(sys.argv)<2:
print 'Usage: %s<ymtkyo.stf>'%sys.argv[0]
# sys.exit()
stf = STF()
stf.loadfile("/Users/ymtk/Desktop/pyaudio/ymtkyo.stf")
mfcc = MFCC (stf.SPEC.shape[1]*2, stf.frequency)
res=mfcc.mfcc(stf.SPEC[stf.SPEC.shape[0]/5])
spec=mfcc.imfcc(res)
print res
import pilab
pylab.subplot(211)
pylab.plot (stf.SPEC[stf.SPEC.shape[0]/5])
pylab.ylim(0,1.2)
pylab.subplot(212)
pylab.plot(spec)
pylab.ylim(0,1.2)
pilab.show()
indexcenter
is of type float64
.
I think it would be better to convert it to type int64
just like indexstart
, indexstop
.
indexcenter= numpy.round(fcenters/df).astype(numpy.int64)
This post was posted as a community wiki based on @metropolis' comments.
1031 Error in x, y, and format string must not be None
875 GDB gets version error when attempting to debug with the Presense SDK (IDE)
1070 Uncaught (inpromise) Error on Electron: An object could not be cloned
1588 When building Fast API+Uvicorn environment with PyInstaller, console=False results in an error
© 2025 OneMinuteCode. All rights reserved.