### Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import simpleaudio as sa
from pydub import AudioSegment 
# import librosa
import os
from moviepy.editor import *



### Loading audio files using pydub

In [2]:
b = 'birds-chirping.wav'
i = 'bees-buzzing.wav'
r = 'calm-river.wav'
birds = AudioSegment.from_file(b)
insects = AudioSegment.from_file(i)
river = AudioSegment.from_file(r)

### Making all audio files the same length

In [3]:
length = np.min([len(birds),len(river),len(insects)])

In [4]:
def match_length(audioSample, targetLength):
    if len(audioSample) > targetLength:
        # Trim the sample if it's longer than the target length
        return audioSample[:targetLength]
    else:
        return audioSample

In [5]:
birds = match_length(birds,length)
insects = match_length(insects,length)
river = match_length(river,length)
river = river - 6
insects = insects - 6
birds = birds + 3

### Declaring variables to be used throughout

In [23]:
def find_global_min_max(audio_segments):
    global_min, global_max = float('inf'), float('-inf')
    for segment in audio_segments:
        samples = np.array(segment.get_array_of_samples())
        segment_min, segment_max = min(samples), max(samples)
        global_min = min(global_min, segment_min)
        global_max = max(global_max, segment_max)
    return global_min, global_max

In [24]:
num_steps = 100
total_min, total_max = find_global_min_max([birds, insects, river])

### Creating the original waveform animation

In [25]:
def generate_waveform_image(audio_segment, filename,min_y,max_y):
    # Convert to numpy array
    samples = np.array(audio_segment.get_array_of_samples())
    plt.figure(figsize=(10, 3))
    plt.plot(samples)
    plt.ylim(min_y,max_y)
    plt.savefig(filename)
    plt.close()

In [26]:
image_dir = "original_waveform_images"
os.makedirs(image_dir, exist_ok=True)
segment_length = length/num_steps + 1
for i, segment_start in enumerate(range(0, len(birds), int(segment_length))):
    segment = birds[segment_start:segment_start + segment_length]
    image_filename = os.path.join(image_dir, f"waveform_{i}.png")
    miny,maxy = find_global_min_max([birds])
    generate_waveform_image(segment, image_filename,miny,maxy)

In [27]:
image_clips = []
for i in range(len(os.listdir(image_dir))):
    image_path = os.path.join(image_dir, f"waveform_{i}.png")
    img_clip = ImageClip(image_path).set_duration(birds.duration_seconds / 100.0)
    image_clips.append(img_clip)

# Concatenate all image clips
video_clip = concatenate_videoclips(image_clips, method="compose")

In [28]:
video_clip = video_clip.set_audio(AudioFileClip("birds-chirping.wav"))

In [29]:
video_clip.write_videofile("original_waveform.mp4", codec="libx264", audio_codec="aac",fps = 24)

Moviepy - Building video original_waveform.mp4.
MoviePy - Writing audio in original_waveformTEMP_MPY_wvf_snd.mp4


                                                                                

MoviePy - Done.
Moviepy - Writing video original_waveform.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready original_waveform.mp4


### Random Walk Implementation

In [33]:
def random_walk(steps, start,center, mul,dimensions = 3):
    walk = np.zeros((steps,dimensions))
    walk[0] = [start,start,start]
    for i in range(1,steps):
        step = np.random.normal(loc = center, size=dimensions)*mul
        walk[i] = walk[i-1] + step
    return walk

In [41]:
volume_walk = random_walk(num_steps,10,0,2, 3)
pitch_walk = random_walk(num_steps,0, 0,0.2, 3)
panning_walk = random_walk(num_steps,0, 0, 0.05, 3)

### Applying the effects on each audio file and buffering to smoothen the audio

In [42]:
# def shift_pitch(audio, shift):
#     samples = np.array(audio.get_array_of_samples()).astype(np.float32)
#     rate = audio.frame_rate
#     samples = samples / (2**15)
#     shifted_samples = librosa.effects.pitch_shift(samples, sr = rate, n_steps = shift)
#     shifted_samples = (shifted_samples * (2**15)).astype(np.int16)
#     shifted_audio = AudioSegment(shifted_samples.tobytes(), frame_rate=rate,sample_width=2,
#                                  channels=len(audio.split_to_mono()))
#     return shifted_audio

In [43]:
def apply_effects(audio,pitch_shift,volume_change, pan_pos):
    audio = audio + volume_change
    panned_audio = audio.pan(pan_pos)
#     pitched_audio = shift_pitch(panned_audio,pitch_shift)
    return panned_audio

In [44]:
image_dir_one = "final_waveform_images_bird"
image_dir_two = "final_waveform_images_all"
os.makedirs(image_dir_one, exist_ok=True)
os.makedirs(image_dir_two, exist_ok=True)

### Processing each audio segment and buffering them in groups

In [45]:
processed_samples_birds = []
processed_samples_combined = []

In [46]:
def process_and_buffer(audio1, audio2, audio3, num_buffers,buffer_size,effect_function):
    buffers = []
    previous_segment = None
    count = 0
    global full_audio_bird, full_audio_all
    full_audio_bird = AudioSegment.silent(duration=0)
    full_audio_all = AudioSegment.silent(duration=0)
    for i in range(0, len(audio1), buffer_size):
        segment1 = audio1[i:i+buffer_size]
        segment2 = audio2[i:i+buffer_size]
        segment3 = audio3[i:i+buffer_size]
        print(count)
        processed_segment1 = effect_function(segment1,pitch_walk[count,0],volume_walk[count,0],panning_walk[count,0])
        processed_segment2 = effect_function(segment2,pitch_walk[count,1],volume_walk[count,1],panning_walk[count,1])
        processed_segment3 = effect_function(segment3,pitch_walk[count,2],volume_walk[count,2],panning_walk[count,2])
        combined_segment = processed_segment1.overlay(processed_segment2).overlay(processed_segment3)
        count+=1
        full_audio_bird += processed_segment1
        full_audio_all += combined_segment
#         image_filename1 = os.path.join(image_dir_one, f"waveform_{count}.png")
#         image_filename2 = os.path.join(image_dir_two, f"waveform_{count}.png")
#         generate_waveform_image(processed_segment1, image_filename1)
#         generate_waveform_image(combined_segment, image_filename2)
        processed_samples_birds.append(processed_segment1)
        processed_samples_combined.append(combined_segment)
        if previous_segment is not None:
            # Apply crossfade with the previous segment
            crossfade = previous_segment.append(combined_segment, crossfade=60)
            buffers.append(crossfade.raw_data)
        previous_segment = combined_segment
#         buffers.append(processed_segment.raw_data)
        if len(buffers) == num_buffers:
            yield b''.join(buffers)
            buffers.clear()
    if buffers:
        yield b''.join(buffers)
#         return full_audio_bird,full_audio_all

In [47]:
num_buffers = 6
buffer_size = length/num_steps + 1
print(buffer_size)
play_objects = []
for buffered_audio in process_and_buffer(birds,river, insects, num_buffers, int(buffer_size), apply_effects):
    play_obj = sa.play_buffer(buffered_audio, num_channels=2, bytes_per_sample=2, sample_rate=44100)
    play_objects.append(play_obj)
        # If there's more than one play object, wait for the previous one to finish
    if len(play_objects) > 1:
        play_objects[-2].wait_done()
    #     raw_data = buffered_audio.raw_data
    #     playback_object = sa.play_buffer(buffered_audio, num_channels=2, bytes_per_sample=2, sample_rate=birds.frame_rate)
    #     playback_object.wait_done()
play_objects[-1].wait_done()

230.92
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [62]:
bird_min,bird_max = find_global_min_max(processed_samples_birds)
total_min, total_max = find_global_min_max(processed_samples_combined)
total_min -= 5000
total_max += 5000
print(total_min,total_max)

-37768 37767


In [57]:
count = 0
for i in processed_samples_birds:
    count+=1
    image_filename = os.path.join(image_dir_one, f"waveform_{count}.png")
    samples = np.array(i.get_array_of_samples())
    plt.figure(figsize=(10, 3))
    plt.plot(samples)
    plt.ylim(bird_min,bird_max)
    plt.savefig(image_filename)
    plt.close()

In [63]:
count = 0
for i in processed_samples_combined:
    count+=1
    image_filename = os.path.join(image_dir_two, f"waveform_{count}.png")
    samples = np.array(i.get_array_of_samples())
    plt.figure(figsize=(10, 3))
    plt.plot(samples)
    plt.ylim(total_min,total_max)
    plt.savefig(image_filename)
    plt.close()

### Extracting the full audios I concatenated above of the birds and of all the sounds

In [64]:
output_path_one = "full_audio_bird.wav"
output_path_two = "full_audio_all.wav"
full_audio_bird.export(output_path_one, format="wav")
full_audio_all.export(output_path_two, format="wav")

<_io.BufferedRandom name='full_audio_all.wav'>

### Putting all images of waveforms captured together and adding audio to video

In [65]:
image_clips_bird = []
image_clips_all = []
for i in range(len(os.listdir(image_dir_one))):
    image_path1 = os.path.join(image_dir_one, f"waveform_{i+1}.png")
    img_clip1 = ImageClip(image_path1).set_duration(birds.duration_seconds/100)
    image_path2 = os.path.join(image_dir_two, f"waveform_{i+1}.png")
    img_clip2 = ImageClip(image_path2).set_duration(birds.duration_seconds/100)
    image_clips_bird.append(img_clip1)
    image_clips_all.append(img_clip2)

# Concatenate all image clips
video_clip1 = concatenate_videoclips(image_clips_bird, method="compose")
video_clip1 = video_clip1.set_audio(AudioFileClip("full_audio_bird.wav"))
video_clip2 = concatenate_videoclips(image_clips_all, method="compose")
video_clip2 = video_clip2.set_audio(AudioFileClip("full_audio_all.wav"))

In [66]:
video_clip1.write_videofile("final_waveform_bird.mp4", codec="libx264", audio_codec="aac",fps = 30)
video_clip2.write_videofile("final_waveform_all.mp4", codec="libx264", audio_codec="aac",fps = 30)

Moviepy - Building video final_waveform_bird.mp4.
MoviePy - Writing audio in final_waveform_birdTEMP_MPY_wvf_snd.mp4


                                                                                

MoviePy - Done.
Moviepy - Writing video final_waveform_bird.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready final_waveform_bird.mp4
Moviepy - Building video final_waveform_all.mp4.
MoviePy - Writing audio in final_waveform_allTEMP_MPY_wvf_snd.mp4


                                                                                

MoviePy - Done.
Moviepy - Writing video final_waveform_all.mp4



                                                                                

Moviepy - Done !
Moviepy - video ready final_waveform_all.mp4
