Hello,
I have created pretty simple voice chat with pyaudio, but the voice is kinda meh. You hear usually some noise like in the old movies. It is probably caused by missing CHUNKs of voice which I send over UDP. Is it possible to somehow reduce the noise?
Further I want to play a sound when user moves over a button but it is impossible for some reason to merge those two tracks!
This is the most important class "Sound". It is running in thread so it can run in forever loop.
import numpy as np
import pyaudio
import wave # I play the buttons effects from wav file
class Sound():
WIDTH = 2
CHANNELS = 2
RATE = 44100
def __init__(self, parent = None):
super(Sound, self).__init__(parent)
self.voiceStreams= []
self.effectStreams= []
self.vVolume= 1
self.eVolume= 0.5
self.voip = None
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format = self.p.get_format_from_width(Sound.WIDTH),
channels = Sound.CHANNELS,
rate = Sound.RATE,
input = True,
output = True,
#stream_callback = self.callback
)
self.nextSample = ""
self.lastSample = ""
self.stream.start_stream()
def addVoice(self, b):
b = np.fromstring(b, np.int16)
self.voiceStreams.append(b)
def addEffect(self, name):
self.effectStreams.append(wave.open(name, "rb"))
def newVVolume(self, v):
self.vVolume = v
def newEVolume(self, v):
self.eVolume = v
def run(self):
while True:
self.myCallback()
def myCallback(self):
_time = time.clock()
if self.nextSample:
self.stream.write(self.nextSample)
self.lastSample = self.nextSample
elif self.lastSample: # I have got some crazy idea, that when there are no data (because UDP doesnt deliver them) I could play the last data, so nobody hears the short silence noise
self.stream.write(self.lastSample)
self.lastSample = ""
_time = time.clock()
#print ("{0:d} ---- {1:d} --- timeWrite: {2:.1f}".format(len(self.voiceStreams), self.stream.get_read_available(), (time.clock() - _time)* 1000) , end = " ")
if self.stream.get_read_available() > 1023:
mic = self.stream.read(1024)
else:
mic = ""
#print ("timeRead: {0:.1f}".format( (time.clock() - _time)* 1000) , end = " ")
if mic and self.voip: self.voip.sendDatagram(mic) #This sends the CHUNK of sound to my UDP client
_time = time.clock()
data = np.zeros(2048, np.int64)
length = len(self.voiceStreams) # I read voice data
l1 = length
for i in range(length):
s = self.voiceStreams.pop(0)
data += s / length * self.vVolume * 0.4 # Here i merge multiple voices with numpy. I also reduce the volume of each voice based on how voices I have...
length = len(self.effectStreams)
toPop= [] # Here i hold indexes of effects which ended playing
for i in range(length):
s = self.effectStreams[i].readframes(1024)
if s == "": # If there are no data to play
toPop.append(i - len(toPop))
else:
d = np.fromstring(s, np.int16)
# Sadly enough each numpy must have same length, so if I get to the end of track, which has only length of 1500 I must throw that away, because numpy doesnt allow me to merge it with array of length 2048
if len(d) > 2047: # And again I merge the sounds with numpy and I reduce the volume
data += (d/ length * length) * self.eVolume * 0.3
for i in toPop: # If I am at the end of track, I delete it
del self.effectStreams[i]
if np.any(data): # If there are any data to read
self.nextSample = data.astype(np.int16).tostring() #I prepare the next CHUNK (should be 20 ms, but I am not sure)
else:
self.nextSample = ""
#print ("timeRest: {0:.1f}".format( (time.clock() - _time)* 1000), end = " || ")
print("HOW MANY CHUNKS OF VOICE I GOT: ", l1)
# It is weird, that when i print the times of reading and writing to stream, it usually prints something like this: (20ms, 20ms, 30ms, 20ms, 20ms, 30ms, 20ms ...)
def close(self):
self.timer.stop()
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
The UDP server and client are pretty simple (and they work well, so I dont post them here). Client just sends all data to server and server sends all data to all clients whenever it gets any. I dont tell anybody, who sends the data. That means if the data are delivered too late, I will play both CHUNKS from one client at the same time (because I consider them that they are from multiple clients)!
Here are the wav files: Click Here
I did not created them, I downloaded from site http://www.freesound.org/people/ERH/sounds/31135/ and they are licensed under Attribution!
!! I have also added "OUTPUT.txt" into the dropbox file, which shows, what does python prints out when running this example between two people (i get voice data only from one user).
Thank you for any advice.