Flip back to ffmpeg decoding (see didn't even need to change the name of the file!)
authorjweigele <jweigele@local>
Sun, 25 Sep 2022 05:12:13 +0000 (22:12 -0700)
committerjweigele <jweigele@local>
Sun, 25 Sep 2022 05:12:13 +0000 (22:12 -0700)
* AudioFile processing was too slow through pedalboard
* Allows some of the async to get work done while spinning off the subprocess, maybe?
* And there was a bug with certain files just not being read properly (okay??)

Seems to work better now!

Dockerfile
ffmpegfile.py

index c901502b60c2665a5df85b4fbd964eeeb1e38b63..3d283395177b516297e6e0175861cf0ad569230f 100644 (file)
@@ -10,7 +10,7 @@ RUN apt-get update
 #RUN apk musl-dev
 RUN apt-get install -y python3-dev
 RUN apt-get install -y python3-numpy
-#RUN apt-get install -y ffmpeg
+RUN apt-get install -y ffmpeg
 RUN apt-get install -y python3-wheel
 RUN apt-get install -y python3-pip
 RUN apt-get install -y python3-cffi
index 47cbd43bd114f38bb3165018eb01921a7bc939b8..e66db9f20dd7461135120ff276adadcd2413ac9c 100644 (file)
@@ -156,10 +156,20 @@ class AudioBuffer(discord.AudioSource):
 
             #next_bytes = byte_reader.read(MAX_LENGTH)
             buffer_samples = None
-            with AudioFile(filename).resampled_to(target_sample_rate=SAMPLE_RATE) as f:
-                # we're reading in the whole file at once, maybe don't want to later for better responsiveness?
-                buffer_samples = f.read(f.frames)
-                #log.debug('samples length: {}'.format(len(buffer_samples)))
+            args = ['ffmpeg', '-i', '{}'.format(filename)]
+            args += ['-f', 's16le', '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '48000', '-']
+            p = await asyncio.create_subprocess_exec(*args, stdin=None, stderr=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE)
+            stdout, stderr = await p.communicate()
+            log.debug(stderr)
+            return_bytes = stdout
+            await p.wait()
+            byte_length = len(return_bytes)
+            buffer_samples = numpy.array([x[0] for x in struct.iter_unpack('<h', return_bytes)])
+            buffer_samples = buffer_samples.astype('float32')
+            buffer_samples /= 32768.0
+            log.debug(buffer_samples)
+            # we're reading in the whole file at once, maybe don't want to later for better responsiveness?
+            #log.debug('samples length: {}'.format(len(buffer_samples)))
             # process here with pedalboard
             shift_tones = random.randint(-10, 10)
             board = pedalboard.Pedalboard([
@@ -169,18 +179,18 @@ class AudioBuffer(discord.AudioSource):
             buffer_samples = board(buffer_samples, SAMPLE_RATE)
             index = 0
             
-            next_samples = buffer_samples[0][index:index+MAX_SAMPLES]
+            next_samples = buffer_samples[index:index+MAX_SAMPLES]
             while len(next_samples) > 0:
                 channels = len(next_samples.shape)
                 while len(next_samples) > 0:
                     # this is to format the samples in the style opus wants
                     #log.debug('channels are {}'.format(channels))
-                    if channels == 2:
-                        # interleave the samples L/R to form a bytestream eventually
-                        next_samples = numpy.squeeze(numpy.dstack((next_samples[0], next_samples[1])).reshape(1, -1))
+                    #if channels == 2:
+                    #    # interleave the samples L/R to form a bytestream eventually
+                    #    next_samples = numpy.squeeze(numpy.dstack((next_samples[0], next_samples[1])).reshape(1, -1))
                     # right now, this is the only path taken (because we discard the other channel)
-                    else:
-                        next_samples = numpy.repeat(next_samples, 2)
+                    #else:
+                    next_samples = numpy.repeat(next_samples, 2)
                     # for the downcast to int16
                     next_samples *= 32768.0
                     next_bytes = next_samples.astype('int16')
@@ -189,7 +199,7 @@ class AudioBuffer(discord.AudioSource):
                     chunks_to_append.append(chunk)
                     # get next chunk
                     index += MAX_SAMPLES
-                    next_samples = buffer_samples[0][index:index+MAX_SAMPLES]
+                    next_samples = buffer_samples[index:index+MAX_SAMPLES]
 
 
             # chunk append/insertion (along with mixing)