import os from pathlib import Path import gradio as gr from rich.console import Console from rich.syntax import Syntax proj_dir = Path(__name__).parent subreddit = os.environ["SUBREDDIT"] username = os.environ["USERNAME"] dataset_name = f"{username}/dataset-creator-{subreddit}" def log_file_to_html_string(): log_file = "mylog.log" num_lines_visualize = 50 console = Console(record=True, width=150) with open(log_file, "rt") as f: # Seek to the end of the file minus 300 lines # Read the last 300 lines of the file lines = f.readlines() lines = lines[-num_lines_visualize:] # Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style output = "".join(lines) syntax = Syntax(output, "python", theme="monokai", word_wrap=True) console.print(syntax) html_content = console.export_html(inline_styles=True) return html_content markdown = f""" # Reddit Scraper This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}). As shown below this space pulls data from pushshift.io, processes it, and puts it in a corresponding dataset. """ with gr.Blocks() as demo: gr.Markdown(markdown) gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png') output = gr.HTML(log_file_to_html_string, every=1) demo.load(None, _js=""" () => { document.body.classList.toggle('dark'); document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)' } """, ) if __name__ == '__main__': demo.launch(server_name="0.0.0.0", show_error=True, server_port=7860, enable_queue=True)