ライブマイクからのpyaudioでタップを検出

Question

Pyaudioを使用して、ライブマイクからの突然のタップノイズを検出するにはどうすればよいですか？

Russell Borogove · Accepted Answer

私がそれをした一つの方法：

一度にサンプルのブロックを読み取る、たとえば0.05秒の価値がある
RMSブロックの振幅（個々のサンプルの二乗の平均の平方根）を計算します）
ブロックのRMS振幅がしきい値より大きい場合、それは「ノイズの多いブロック」です。それ以外の場合、それは「静かなブロック」です。
突然タップすると、静かなブロックに続いて少数のノイズの多いブロックが続き、その後静かなブロックになります
静かなブロックが得られない場合は、しきい値が低すぎます
ノイズの多いブロックが表示されない場合は、しきい値が高すぎます

私のアプリケーションは「興味深い」ノイズを無人で記録していたので、ノイズの多いブロックが存在する限り記録していました。 15秒間のノイズのある期間（「耳を覆う」）がある場合は、しきい値に1.1を掛け、15の場合はしきい値を0.9に掛けます分静かな期間（「聞くのが難しい」））。アプリケーションにはさまざまなニーズがあります。

また、観察されたRMS=値に関する私のコードのコメントに気づきました。MacbookProの内蔵マイクでは、+ /-1.0の正規化されたオーディオデータ範囲で、入力ボリュームが最大に設定されています。いくつかのデータポイント：

私の家にある不愉快に大きな中央暖房ファン0.003-0.006（-50dBから-44dB）
同じラップトップで0.010-0.40（-40dB〜-8dB）タイピング
0.10（-20dB）1 'の距離で指を柔らかくスナップ
0.60（-4.4dB）1 'で指を大音量でスナップ

更新：開始するためのサンプルを以下に示します。

#!/usr/bin/python # open a microphone in pyAudio and listen for taps import pyaudio import struct import math INITIAL_TAP_THRESHOLD = 0.010 FORMAT = pyaudio.Paint16 SHORT_NORMALIZE = (1.0/32768.0) CHANNELS = 2 RATE = 44100 INPUT_BLOCK_TIME = 0.05 INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) # if we get this many noisy blocks in a row, increase the threshold OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME # if we get this many quiet blocks in a row, decrease the threshold UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME # if the noise was longer than this many blocks, it's not a 'tap' MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME def get_rms( block ): # RMS amplitude is defined as the square root of the # mean over time of the square of the amplitude. # so we need to convert this string of bytes into # a string of 16-bit samples... # we will get one short out for each # two chars in the string. count = len(block)/2 format = "%dh"%(count) shorts = struct.unpack( format, block ) # iterate over the block. sum_squares = 0.0 for sample in shorts: # sample is a signed short in +/- 32768. # normalize it to 1.0 n = sample * SHORT_NORMALIZE sum_squares += n*n return math.sqrt( sum_squares / count ) class TapTester(object): def __init__(self): self.pa = pyaudio.PyAudio() self.stream = self.open_mic_stream() self.tap_threshold = INITIAL_TAP_THRESHOLD self.noisycount = MAX_TAP_BLOCKS+1 self.quietcount = 0 self.errorcount = 0 def stop(self): self.stream.close() def find_input_device(self): device_index = None for i in range( self.pa.get_device_count() ): devinfo = self.pa.get_device_info_by_index(i) print( "Device %d: %s"%(i,devinfo["name"]) ) for keyword in ["mic","input"]: if keyword in devinfo["name"].lower(): print( "Found an input: device %d - %s"%(i,devinfo["name"]) ) device_index = i return device_index if device_index == None: print( "No preferred input found; using default input device." ) return device_index def open_mic_stream( self ): device_index = self.find_input_device() stream = self.pa.open( format = FORMAT, channels = CHANNELS, rate = RATE, input = True, input_device_index = device_index, frames_per_buffer = INPUT_FRAMES_PER_BLOCK) return stream def tapDetected(self): print("Tap!") def listen(self): try: block = self.stream.read(INPUT_FRAMES_PER_BLOCK) except IOError as e: # dammit. self.errorcount += 1 print( "(%d) Error recording: %s"%(self.errorcount,e) ) self.noisycount = 1 return amplitude = get_rms( block ) if amplitude > self.tap_threshold: # noisy block self.quietcount = 0 self.noisycount += 1 if self.noisycount > OVERSENSITIVE: # turn down the sensitivity self.tap_threshold *= 1.1 else: # quiet block. if 1 <= self.noisycount <= MAX_TAP_BLOCKS: self.tapDetected() self.noisycount = 0 self.quietcount += 1 if self.quietcount > UNDERSENSITIVE: # turn up the sensitivity self.tap_threshold *= 0.9 if __name__ == "__main__": tt = TapTester() for i in range(1000): tt.listen()

user1405612 · Answer

上記のコードの簡略版...

import pyaudio import struct import math INITIAL_TAP_THRESHOLD = 0.010 FORMAT = pyaudio.Paint16 SHORT_NORMALIZE = (1.0/32768.0) CHANNELS = 2 RATE = 44100 INPUT_BLOCK_TIME = 0.05 INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME # if we get this many quiet blocks in a row, decrease the threshold MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME # if the noise was longer than this many blocks, it's not a 'tap' def get_rms(block): # RMS amplitude is defined as the square root of the # mean over time of the square of the amplitude. # so we need to convert this string of bytes into # a string of 16-bit samples... # we will get one short out for each # two chars in the string. count = len(block)/2 format = "%dh"%(count) shorts = struct.unpack( format, block ) # iterate over the block. sum_squares = 0.0 for sample in shorts: # sample is a signed short in +/- 32768. # normalize it to 1.0 n = sample * SHORT_NORMALIZE sum_squares += n*n return math.sqrt( sum_squares / count ) pa = pyaudio.PyAudio() #] #| stream = pa.open(format = FORMAT, #| channels = CHANNELS, #|---- You always use this in pyaudio... rate = RATE, #| input = True, #| frames_per_buffer = INPUT_FRAMES_PER_BLOCK) #] tap_threshold = INITIAL_TAP_THRESHOLD #] noisycount = MAX_TAP_BLOCKS+1 #|---- Variables for noise detector... quietcount = 0 #| errorcount = 0 #] for i in range(1000): try: #] block = stream.read(INPUT_FRAMES_PER_BLOCK) #| except IOError, e: #|---- just in case there is an error! errorcount += 1 #| print( "(%d) Error recording: %s"%(errorcount,e) ) #| noisycount = 1 #] amplitude = get_rms(block) if amplitude > tap_threshold: # if its to loud... quietcount = 0 noisycount += 1 if noisycount > OVERSENSITIVE: tap_threshold *= 1.1 # turn down the sensitivity else: # if its to quiet... if 1 <= noisycount <= MAX_TAP_BLOCKS: print 'tap!' noisycount = 0 quietcount += 1 if quietcount > UNDERSENSITIVE: tap_threshold *= 0.9 # turn up the sensitivity