import gradio as gr import logging import json import os from typing import Dict, Any, List from itertools import groupby # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) video_folder = 'video/' metadata_folder = 'metadata/' def load_video_list() -> List[Dict[str, str]]: video_list = [] for filename in os.listdir(video_folder): if filename.endswith('.mp4'): video_id = os.path.splitext(filename)[0] metadata_path = os.path.join(metadata_folder, f"{video_id}.json") if os.path.exists(metadata_path): with open(metadata_path, 'r') as f: metadata = json.load(f) metadata = metadata['content_metadata'] title = metadata.get('title', 'Untitled') video_list.append({"video_id": video_id, "title": title}) # Define the custom order for the first five videos custom_order = ['7BhJmDPB7RU', 'PrAwsi3Ldzo', '3rhsSPxQ39c', 'P7WnJZ55sgc', 'g9GtUQs7XUM'] # Custom sorting function def custom_sort(item): try: return custom_order.index(item['video_id']) except ValueError: return len(custom_order) + 1 # Place non-specified videos after the custom ordered ones # Sort the video list video_list.sort(key=lambda x: (custom_sort(x), x['title'])) return video_list def score_to_emoji(score): if score < 0.2: return "😴" elif score < 0.4: return "🙂" elif score < 0.6: return "😊" elif score < 0.8: return "😃" else: return "🤩" def load_metadata(video_id: str) -> Dict[str, Any]: metadata_path = os.path.join(metadata_folder, f"{video_id}.json") try: with open(metadata_path, 'r') as f: asd =json.load(f) return asd['content_metadata'] except FileNotFoundError: logger.error(f"Metadata file not found for video ID: {video_id}") raise except json.JSONDecodeError: logger.error(f"Invalid JSON in metadata file for video ID: {video_id}") raise def timestamp_to_seconds(timestamp: str) -> float: try: h, m, s = timestamp.split(':') return int(h) * 3600 + int(m) * 60 + float(s) except ValueError: logger.error(f"Invalid timestamp format: {timestamp}") return 0.0 def format_timestamp(timestamp: str) -> str: try: h, m, s = timestamp.split(':') return f"{int(m):02d}:{int(float(s)):02d}" except Exception as e: logger.error(f"Invalid timestamp format: {timestamp}") return "" def create_scene_table(scene: Dict[str, Any]) -> str: dynamism_score = scene.get('dynamismScore', 0) av_correlation = scene.get('audioVisualCorrelation', 0) cast = ", ".join([cast_member for cast_member in scene.get('cast', [])]) output = f"""

Scene {scene.get('sceneId', 'Unknown')}: {scene.get('title', '')}

Dynamism: {score_to_emoji(dynamism_score)} Audio-visual correlation: {score_to_emoji(av_correlation)} Cast: {cast}

""" scene_events = [] # Collect all scene data data_types = [ ('Activities', scene.get('activities', [])), ('Props', scene.get('props', [])), ('Mood', [scene.get('mood', {})]), ('Narrative Progression', scene.get('narrativeProgression', [])), ('Video Editing Details', scene.get('videoEditingDetails', [])), ('Thematic Elements', [{'description': scene.get('thematicElements', '')}]), ('Contextual Relevance', [{'description': scene.get('contextualRelevance', '')}]), ('Character Interaction', scene.get('characterInteraction', [])) ] for data_type, data_list in data_types: for item in data_list: if isinstance(item, dict): start_time = '' end_time = '' description = '' if data_type == 'Activities': start_time = item.get('timestamp', {}).get('start_timestamp', '') end_time = item.get('timestamp', {}).get('end_timestamp', '') description = item.get('description', '') elif data_type == 'Props': start_time = item.get('timestamp', {}).get('start_timestamp', '') end_time = item.get('timestamp', {}).get('end_timestamp', '') description = item.get('name', '') elif data_type == 'Video Editing Details': start_time = item.get('timestamps', {}).get('start_timestamp', '') end_time = item.get('timestamps', {}).get('end_timestamp', '') description = item.get('description', '') elif data_type == 'Mood': description = item.get('description', '') # Handle mood changes for mood_change in item.get('keyMoments', []): if isinstance(mood_change, dict): scene_events.append({ 'timestamp_start': mood_change.get('timestamp', ''), 'timestamp_end': '', 'type': 'Mood Change', 'description': mood_change.get('changeDescription', '') }) elif data_type == 'Character Interaction': characters = ', '.join(item.get('characters', [])) description = f"{characters}: {item.get('description', '')}" else: start_time = item.get('timestamp', '') description = item.get('description', '') scene_events.append({ 'timestamp_start': start_time, 'timestamp_end': end_time, 'type': data_type, 'description': description }) elif isinstance(item, str): scene_events.append({ 'timestamp_start': '', 'timestamp_end': '', 'type': data_type, 'description': item }) # Sort events by timestamp scene_events.sort(key=lambda x: x['timestamp_start'] if x['timestamp_start'] else '') for event in scene_events: start_time = format_timestamp(event['timestamp_start']) end_time = format_timestamp(event['timestamp_end']) start_link = f'{start_time}' if start_time else '' end_link = f' - {end_time}' if end_time else '' output += f""" """ output += """

Timestamp	Type	Description
{start_link}{end_link}	{event['type']}	{event['description']}

""" return output def create_storylines_table(storylines: Dict[str, Any]) -> str: output = """

Storylines

""" output += f""" """ output += """

Storyline	Scenes Involved
{storylines.get('description', 'No description available')}	{', '.join(map(str, storylines.get('scenes', [])))}

""" return output def create_qa_section(qa_list: List[Dict[str, str]]) -> str: output = """

Q&A

""" for qa in qa_list: output += f"""

{qa.get('question', '')}

{qa.get('answer', '')}

""" output += """

""" return output def create_trimming_suggestions(suggestions: List[Dict[str, Any]]) -> str: output = """

Trimming Suggestions

""" for suggestion in suggestions: start_time = suggestion.get('timestamps', {}).get('start_timestamp', '') end_time = suggestion.get('timestamps', {}).get('end_timestamp', '') start_formatted = format_timestamp(start_time) end_formatted = format_timestamp(end_time) output += f""" """ output += """

Timestamp	Description
{start_formatted} {f' - {end_formatted}' if end_time else ''}	{suggestion.get('description', '')}

""" return output def create_filmstrip(scenes: List[Dict[str, Any]], video_duration: float) -> str: filmstrip_html = f"""

""" for scene in scenes: start_time = timestamp_to_seconds(scene['timestamps'].get('start_timestamp', '0:00:00')) end_time = timestamp_to_seconds(scene['timestamps'].get('end_timestamp', str(video_duration))) left_pos = (start_time / video_duration) * 100 width = ((end_time - start_time) / video_duration) * 100 title = scene.get('title', '') filmstrip_html += f'''

{title}

''' filmstrip_html += """

""" return filmstrip_html def process_video(video_id: str): try: logger.info(f"Processing video with ID: {video_id}") metadata = load_metadata(video_id) # Always use the test URL instead of the actual video file video_url = f"https://huggingface.co/spaces/HuggingFaceFV/FineVideo-Explorer/resolve/main/video/{video_id}.mp4" # Create HTML for video player video_html = f"""

""" # Character List Table character_table = """

Characters

""" for character in metadata.get('characterList', []): character_table += f""" """ character_table += "

Character	Description
{character.get('name', '')}	{character.get('description', '')}

" additional_data = f"""

{metadata.get('title', 'Untitled')}

Description: {metadata.get('description', 'No description available')}

{character_table} """ scenes_output = "" for scene in metadata.get('scenes', []): scenes_output += create_scene_table(scene) storylines_output = create_storylines_table(metadata.get('storylines', {})) qa_output = create_qa_section(metadata.get('qAndA', [])) trimming_suggestions_output = create_trimming_suggestions(metadata.get('trimmingSuggestions', [])) # Generate filmstrip HTML last_scene = metadata['scenes'][-1] video_duration = timestamp_to_seconds(last_scene['timestamps'].get('end_timestamp', '0:00:00')) filmstrip_html = create_filmstrip(metadata['scenes'], video_duration) logger.info("Video processing completed successfully") return video_html, filmstrip_html, additional_data + scenes_output + storylines_output + qa_output + trimming_suggestions_output except Exception as e: logger.exception(f"Error processing video: {str(e)}") return None, "", f"Error processing video: {str(e)}" css = """ * { margin: 0; padding: 0; box-sizing: border-box; # flex: 0!important; } html, body, gradio-app { height: 100%; min-height: unset!important; max-height: unset!important; display: block!important; } .main { flex-grow: 1; flex-shrink: 1; overflow: hidden; } .main .wrap .contain { flex-grow: 1; flex-shrink: 1; overflow: hidden; } #component-0 { overflow: hidden; } # .app { # overflow: hidden; # } #top-panel { flex-shrink: 0; } body { margin: 0; padding: 0; font-family: Arial, sans-serif; overflow: hidden; } .container { display: flex; flex-direction: column; height: 100vh; } #header { display: flex; align-items: center; padding: 10px; background-color: white; } #logo { width: auto; height: 150px; box-shadow: none !important; border: none !important; background: none !important; object-fit: contain; } #header-content { flex-grow: 1; display: flex; justify-content: space-between; align-items: center; } #header-content h1 { margin: 0; font-size: 36px; font-weight: bold; } #header-content a { font-size: 18px; color: #0066cc; text-decoration: none; } #header-content a:hover { text-decoration: underline; } #top-panel { height: 33vh; display: flex; padding: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); overflow: hidden; } #video-list-column { max-height: 80vh; /* Adjust as needed */ overflow-y: auto; height: 100%; } #video-column { width: 70%; display: flex; flex-direction: column; } #video-wrapper { flex-grow: 1; display: flex; justify-content: center; align-items: center; overflow: hidden; } #video-player { width: 100%; max-height: calc(33vh - 120px) !important; } #filmstrip-container { width: 100%; height: 80px !important; background-color: #f0f0f0; position: relative; overflow: hidden; cursor: pointer; } #filmstrip-container > div, #filmstrip-container > div > div, #filmstrip-container > div > div > div { height: 100% !important; } #scrollable-content { overflow-y: auto; padding: 20px; } #metadata-container { margin-top: 20px; } .content-samples { display: flex; flex-direction: column; overflow-y: auto; max-height: 100%; } .content-samples > .wrap { display: flex; flex-direction: column; } .content-samples .hidden { display: none !important; } .content-samples > .wrap > .wrap { display: flex !important; flex-direction: column !important; } .content-samples label { display: block; padding: 10px; cursor: pointer; border-bottom: 1px solid #ddd; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .content-samples label:hover { background-color: #f0f0f0; } .video-info { margin-bottom: 20px; } .scene-container { margin-bottom: 30px; } .metadata-table { width: 100%; border-collapse: collapse; margin-bottom: 20px; } .metadata-table th, .metadata-table td { border: 1px solid #ddd; padding: 8px; text-align: left; } .metadata-table th { background-color: #f2f2f2; } .metadata-table tr:nth-child(even) { background-color: #f9f9f9; } .timestamp-link { color: #0066cc; text-decoration: none; cursor: pointer; } .timestamp-link:hover { text-decoration: underline; } .chat-discussion { background-color: #f0f0f0; border-radius: 10px; padding: 15px; margin-bottom: 20px; } .question { font-weight: bold; margin-bottom: 5px; } .answer { margin-bottom: 15px; padding-left: 15px; } .correlation-scores { font-size: 18px; margin-bottom: 20px; } #reinitialization-overlay { position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0, 0, 0, 0.5); display: flex; justify-content: center; align-items: center; z-index: 9999; color: white; font-size: 24px; font-weight: bold; } @media (max-width: 768px) { #header { flex-direction: column; align-items: flex-start; flex-wrap: nowrap; overflow: hidden; max-height: 25vh; } #header-content h1 { font-size: 24px; } #header-content p { font-size: 14px; } #logo { align-self: flex-end; margin-top: 10px; position: absolute; top: 12px; right: -24px; height: auto!important; width: 150px; min-width: 0!important; margin:0!important; } #logo .image-container { height: auto!important; } .image-frame { height: auto!important; } #top-panel { flex-direction: column; height: 50vh; } #video-list-column, #video-column { width: 100%; } #video-list-column { flex-grow: 7!important; } } .icon-buttons button { display: none !important; } /* Ensure one element per row in Gradio list */ #video-list-column .wrap { display: flex; flex-direction: column; } #video-list-column .wrap > .wrap { display: flex !important; flex-direction: column !important; } #video-list-column label { display: block; width: 100%; } """ js = """ """ with gr.Blocks(css=css, head=js) as iface: with gr.Row(elem_id="header"): with gr.Column(scale=1): gr.Image("logo.png", elem_id="logo", show_label=False, interactive=False) gr.Markdown("### Click a title to dive into the data:") with gr.Column(elem_id="header-content", scale=10): gr.Markdown(""" # Exploration page ## [🔗 Dataset](https://huggingface.co/datasets/HuggingFaceFV/finevideo) """) with gr.Row(elem_id="top-panel"): with gr.Column(scale=3, elem_id="video-list-column"): video_list_data = load_video_list() video_list = gr.Radio( label="Content Samples", choices=[video["title"] for video in video_list_data], elem_id="video-list", value=None, container=False ) with gr.Column(scale=7, elem_id="video-column"): video_output = gr.HTML(elem_id="video-container") filmstrip_output = gr.HTML(elem_id="filmstrip-container") with gr.Row(elem_id="scrollable-content"): metadata_output = gr.HTML(elem_id="metadata-container") def wrapped_process_video(title: str) -> tuple: if not title: return "", "", "" video_id = next(video["video_id"] for video in video_list_data if video["title"] == title) logging.info(f"Processing video with ID: {video_id}") video_html, filmstrip_html, metadata_html = process_video(video_id) return video_html, filmstrip_html, metadata_html video_list.change( fn=wrapped_process_video, inputs=[video_list], outputs=[video_output, filmstrip_output, metadata_output] ) if __name__ == "__main__": iface.launch()