animikhaich commited on
Commit
0c4c7bf
1 Parent(s): 8a2882e

UI - Half Done. Needs fixing

Browse files
Files changed (3) hide show
  1. engine/__init__.py +1 -0
  2. engine/audio_generator.py +8 -1
  3. main.py +81 -57
engine/__init__.py CHANGED
@@ -1 +1,2 @@
1
  from .video_descriptor import DescribeVideo
 
 
1
  from .video_descriptor import DescribeVideo
2
+ from .audio_generator import GenerateAudio
engine/audio_generator.py CHANGED
@@ -114,7 +114,14 @@ class GenerateAudio:
114
 
115
  if __name__ == "__main__":
116
  audio_gen = GenerateAudio()
117
- sample_rate, result = audio_gen.generate_audio(["A piano playing a jazz melody", "A guitar playing a rock riff", "A LoFi music for coding"], duration=10)
 
 
 
 
 
 
 
118
  paths = audio_gen.save_audio()
119
  print(f"Saved audio to: {paths}")
120
  buffers = audio_gen.get_audio_buffer()
 
114
 
115
  if __name__ == "__main__":
116
  audio_gen = GenerateAudio()
117
+ sample_rate, result = audio_gen.generate_audio(
118
+ [
119
+ "A piano playing a jazz melody",
120
+ "A guitar playing a rock riff",
121
+ "A LoFi music for coding"
122
+ ],
123
+ duration=10
124
+ )
125
  paths = audio_gen.save_audio()
126
  print(f"Saved audio to: {paths}")
127
  buffers = audio_gen.get_audio_buffer()
main.py CHANGED
@@ -1,67 +1,91 @@
1
  import streamlit as st
 
2
 
3
- def main():
4
- st.set_page_config(page_title="VidTune: Where Videos Find Their Melody", layout="centered")
5
 
6
- # Title and Description
7
- st.title("VidTune: Where Videos Find Their Melody")
8
- st.write("VidTune is a web application that allows users to upload videos and generate melodies matching the mood of the video.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # Main Page (Page 1)
11
- if 'page' not in st.session_state:
12
- st.session_state.page = 'main'
 
 
 
 
13
 
14
  if st.session_state.page == 'main':
15
- st.header("Video to Music")
16
- uploaded_video = st.file_uploader("Upload Video", type=["mp4"])
17
- if uploaded_video is not None:
18
- st.session_state.uploaded_video = uploaded_video
19
- st.session_state.page = 'video_to_music'
20
-
21
- if st.session_state.page == 'main':
22
- st.header("Prompt to Music")
23
- prompt = st.text_area("Prompt")
24
- if st.button("Generate"):
25
- st.session_state.prompt = prompt
26
- st.session_state.page = 'prompt_to_music'
27
 
28
- # Page 2a (If the user uploads a video)
29
- if st.session_state.page == 'video_to_music':
30
- st.sidebar.title("Settings")
31
- device = st.sidebar.selectbox("Select Device", ["GPU", "CPU"], index=0)
32
- num_samples = st.sidebar.slider("Number of samples", 1, 10, 3)
33
-
34
- st.video(st.session_state.uploaded_video)
35
-
36
- st.text_area("Video Description", "This is a fixed video description", disabled=True)
37
- st.text_area("Music Description")
38
-
39
- if st.button("Generate Music"):
40
- st.session_state.page = 'result'
41
- st.session_state.device = device
42
- st.session_state.num_samples = num_samples
43
 
44
- # Page 2b (If user selects "Prompt to Music" in Page 1)
45
- if st.session_state.page == 'prompt_to_music':
46
- st.sidebar.title("Settings")
47
- device = st.sidebar.selectbox("Select Device", ["GPU", "CPU"], index=0)
48
- num_samples = st.sidebar.slider("Number of samples", 1, 10, 3)
49
-
50
- if st.button("Generate Music"):
51
- st.session_state.page = 'result'
52
- st.session_state.device = device
53
- st.session_state.num_samples = num_samples
54
 
55
- # Page 3 (Results Page)
56
- if st.session_state.page == 'result':
57
- st.header("Generated Music")
58
- for i in range(st.session_state.num_samples):
59
- st.write(f"Music Sample {i+1}")
60
- st.audio(f"Generated Music {i+1}.mp3", format='audio/mp3')
61
- st.download_button(f"Download Music {i+1}", f"Generated Music {i+1}.mp3")
62
-
63
- if st.button("Start Over"):
64
- st.session_state.page = 'main'
65
 
66
- if __name__ == "__main__":
67
- main()
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from engine import DescribeVideo, GenerateAudio
3
 
 
 
4
 
5
+ video_model_map = {
6
+ "Fast": "flash",
7
+ "Quality": "pro",
8
+ }
9
+
10
+ music_model_map = {
11
+ "Fast": "musicgen-stereo-small",
12
+ "Balanced": "musicgen-stereo-medium",
13
+ "Quality": "musicgen-stereo-large",
14
+ }
15
+
16
+
17
+ st.set_page_config(page_title="VidTune: Where Videos Find Their Melody", layout="centered")
18
+
19
+ # Title and Description
20
+ st.title("VidTune: Where Videos Find Their Melody")
21
+ st.write("VidTune is a web application that allows users to upload videos and generate melodies matching the mood of the video.")
22
+
23
+
24
+ # Sidebar
25
+ st.sidebar.title("Settings")
26
+ video_model = st.sidebar.selectbox("Select Video Descriptor", ["Fast", "Balanced", "Quality"], index=0)
27
+ music_model = st.sidebar.selectbox("Select Music Generator", ["Fast", "Balanced", "Quality"], index=0)
28
+ num_samples = st.sidebar.slider("Number of samples", 1, 8, 3)
29
+ generate_button = st.sidebar.button("Generate Music")
30
+
31
+ video_descriptor = DescribeVideo(model=video_model_map[video_model])
32
+ audio_generator = GenerateAudio(model=music_model_map[music_model])
33
+
34
+ video_description = None
35
+
36
+ # Main Page (Page 1)
37
+ if 'page' not in st.session_state:
38
+ st.session_state.page = 'main'
39
+
40
+ if st.session_state.page == 'main':
41
+ st.header("Video to Music")
42
+ uploaded_video = st.file_uploader("Upload Video", type=["mp4"])
43
 
44
+ if uploaded_video is not None:
45
+ st.session_state.uploaded_video = uploaded_video
46
+ with open("temp.mp4", mode='wb') as w:
47
+ w.write(uploaded_video.getvalue())
48
+ video_description = video_descriptor.describe_video("temp.mp4")
49
+
50
+ st.session_state.page = 'video_to_music'
51
 
52
  if st.session_state.page == 'main':
53
+ st.header("Prompt to Music")
54
+ prompt = st.text_area("Prompt")
55
+ if generate_button:
56
+ st.session_state.prompt = prompt
57
+ st.session_state.page = 'prompt_to_music'
58
+
59
+ # Page 2a (If the user uploads a video)
60
+ if st.session_state.page == 'video_to_music':
61
+ st.video(st.session_state.uploaded_video)
 
 
 
62
 
63
+ st.text_area("Video Description", "This is a fixed video description", disabled=True)
64
+ st.text_area("Music Description")
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ if generate_button:
67
+ st.session_state.page = 'result'
68
+ st.session_state.device = device
69
+ st.session_state.num_samples = num_samples
70
+
71
+ # Page 2b (If user selects "Prompt to Music" in Page 1)
72
+ if st.session_state.page == 'prompt_to_music':
73
+ st.sidebar.title("Settings")
74
+ device = st.sidebar.selectbox("Select Device", ["GPU", "CPU"], index=0)
75
+ num_samples = st.sidebar.slider("Number of samples", 1, 10, 3)
76
 
77
+ if generate_button:
78
+ st.session_state.page = 'result'
79
+ st.session_state.device = device
80
+ st.session_state.num_samples = num_samples
 
 
 
 
 
 
81
 
82
+ # Page 3 (Results Page)
83
+ if st.session_state.page == 'result':
84
+ st.header("Generated Music")
85
+ for i in range(st.session_state.num_samples):
86
+ st.write(f"Music Sample {i+1}")
87
+ st.audio(f"Generated Music {i+1}.mp3", format='audio/mp3')
88
+ st.download_button(f"Download Music {i+1}", f"Generated Music {i+1}.mp3")
89
+
90
+ if st.button("Start Over"):
91
+ st.session_state.page = 'main'