Multi-channel drifting!
authorjweigele <jweigele@local>
Thu, 29 Sep 2022 23:25:49 +0000 (16:25 -0700)
committerjweigele <jweigele@local>
Thu, 29 Sep 2022 23:25:49 +0000 (16:25 -0700)
ffmpegfile.py

index 4bfe8d73ba50c6bac31ff1d712f6f5fd83cb342e..cbe27243e7ca7be432d75fa30ca4792d179b3fa0 100644 (file)
@@ -32,15 +32,16 @@ class AudioChunk(object):
         if init_bytes is not None:
             init_bytes = self.pad_out(init_bytes)
             self.samples = numpy.array([x[0] for x in struct.iter_unpack('<h', init_bytes)])
+            self.samples = self.samples.reshape(-1, 2).transpose()
             self.samples = self.samples.astype('float32')
             self.samples /= 32768.0
         elif init_samples is not None:
             self.samples = init_samples
             #log.debug('init by samples')
-            if len(self.samples) < MAX_SAMPLES*2:
-                samples_to_add = int(MAX_SAMPLES*2 - len(self.samples))
-                #log.debug('samples to add: {}'.format(samples_to_add))
-                self.samples = numpy.concatenate((self.samples, numpy.array([0.0]*samples_to_add)))
+            if self.samples.size < MAX_SAMPLES*2:
+                samples_to_add = int(MAX_SAMPLES*2 - self.samples.size)
+                log.debug('samples to add: {}'.format(samples_to_add))
+                self.samples = numpy.concatenate((self.samples, numpy.repeat(numpy.zeros(shape=(2,1)), samples_to_add//2, axis=1)), axis=1)
 
     @property
     def sample_buffer(self):
@@ -48,8 +49,9 @@ class AudioChunk(object):
         #log.debug(len(self.samples))
         #log.debug(list(self.samples))
         next_samples = 32768.0 * self.samples
+        next_samples = numpy.squeeze(numpy.dstack((next_samples[0], next_samples[1])).reshape(1, -1))
         next_bytes = next_samples.astype('int16')
-        retval = struct.pack('<'+'h'*len(next_bytes), *list(next_bytes))
+        retval = struct.pack('<'+'h'*next_bytes.size, *list(next_bytes))
         return retval
 
     @property
@@ -154,7 +156,6 @@ class AudioBuffer(discord.AudioSource):
         locked = False
         try:
             min_deck_size = self.DECK_SIZE
-            #return_byte_length = len(return_bytes)
             chunks_to_append = []
             #byte_reader = io.BytesIO(return_bytes)
             # byte processing and chunk generation
@@ -162,17 +163,21 @@ class AudioBuffer(discord.AudioSource):
             #next_bytes = byte_reader.read(MAX_LENGTH)
             buffer_samples = None
             args = ['ffmpeg', '-i', '{}'.format(filename)]
-            args += ['-f', 's16le', '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '48000', '-']
+            args += ['-f', 's16le', '-acodec', 'pcm_s16le', '-ac', '2', '-ar', '48000', '-']
             p = await asyncio.create_subprocess_exec(*args, stdin=None, stderr=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE)
             stdout, stderr = await p.communicate()
             log.debug(stderr)
             return_bytes = stdout
             await p.wait()
-            byte_length = len(return_bytes)
+            # this just boils everything down to a 1d array of int16
             buffer_samples = numpy.array([x[0] for x in struct.iter_unpack('<h', return_bytes)])
+            # this splits out L/R channels to their own array, assuming interleaved 2 channels from ^
+            buffer_samples = buffer_samples.reshape(-1, 2).transpose()
+            # make all samples floats, for future processing
             buffer_samples = buffer_samples.astype('float32')
             buffer_samples /= 32768.0
-            log.debug(buffer_samples)
+            #log.debug(buffer_samples)
+            #log.debug(len(buffer_samples))
             # we're reading in the whole file at once, maybe don't want to later for better responsiveness?
             #log.debug('samples length: {}'.format(len(buffer_samples)))
             # process here with pedalboard
@@ -184,27 +189,14 @@ class AudioBuffer(discord.AudioSource):
             buffer_samples = board(buffer_samples, SAMPLE_RATE)
             index = 0
             
-            next_samples = buffer_samples[index:index+MAX_SAMPLES]
-            while len(next_samples) > 0:
-                channels = len(next_samples.shape)
-                while len(next_samples) > 0:
-                    # this is to format the samples in the style opus wants
-                    #log.debug('channels are {}'.format(channels))
-                    #if channels == 2:
-                    #    # interleave the samples L/R to form a bytestream eventually
-                    #    next_samples = numpy.squeeze(numpy.dstack((next_samples[0], next_samples[1])).reshape(1, -1))
-                    # right now, this is the only path taken (because we discard the other channel)
-                    #else:
-                    next_samples = numpy.repeat(next_samples, 2)
-                    # for the downcast to int16
-                    #next_samples *= 32768.0
-                    #next_bytes = next_samples.astype('int16')
-                    chunk = AudioChunk(init_samples=next_samples)
-                    #log.debug(chunk, chunk.samples)
-                    chunks_to_append.append(chunk)
-                    # get next chunk
-                    index += MAX_SAMPLES
-                    next_samples = buffer_samples[index:index+MAX_SAMPLES]
+            next_samples = buffer_samples[:,index:index+MAX_SAMPLES]
+            while next_samples.size > 0:
+                chunk = AudioChunk(init_samples=next_samples)
+                #log.debug(chunk, chunk.samples)
+                chunks_to_append.append(chunk)
+                # get next chunk
+                index += MAX_SAMPLES
+                next_samples = buffer_samples[:,index:index+MAX_SAMPLES]
 
 
             # chunk append/insertion (along with mixing)