|
import os |
|
from pathlib import Path |
|
|
|
import gradio as gr |
|
from rich.console import Console |
|
from rich.syntax import Syntax |
|
|
|
proj_dir = Path(__name__).parent |
|
|
|
subreddit = os.environ["SUBREDDIT"] |
|
username = os.environ["USERNAME"] |
|
dataset_name = f"{username}/dataset-creator-{subreddit}" |
|
|
|
|
|
def log_file_to_html_string(): |
|
log_file = "mylog.log" |
|
num_lines_visualize = 50 |
|
|
|
console = Console(record=True, width=150) |
|
with open(log_file, "rt") as f: |
|
|
|
|
|
lines = f.readlines() |
|
lines = lines[-num_lines_visualize:] |
|
|
|
|
|
output = "".join(lines) |
|
syntax = Syntax(output, "python", theme="monokai", word_wrap=True) |
|
|
|
console.print(syntax) |
|
html_content = console.export_html(inline_styles=True) |
|
|
|
return html_content |
|
|
|
|
|
markdown = f""" |
|
# Reddit Scraper |
|
This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}). |
|
|
|
As shown below this space pulls data from pushshift.io, processes it, and puts it in a corresponding dataset. |
|
""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(markdown) |
|
gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png') |
|
output = gr.HTML(log_file_to_html_string, every=1) |
|
demo.load(None, |
|
_js=""" |
|
() => { |
|
document.body.classList.toggle('dark'); |
|
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)' |
|
} |
|
""", ) |
|
|
|
if __name__ == '__main__': |
|
demo.launch(server_name="0.0.0.0", show_error=True, server_port=7860, enable_queue=True) |
|
|