Spaces:

reddit-tools-HF
/

dataset-creator-reddit-bestofredditorupdates

Running

App Files Files Community

dataset-creator-reddit-bestofredditorupdates / app.py

derek-thomas's picture

derek-thomas HF staff

App shows last 50 lines

e014498 over 1 year ago

1.74 kB

	import os
	from pathlib import Path

	import gradio as gr
	from rich.console import Console
	from rich.syntax import Syntax

	proj_dir = Path(__name__).parent

	subreddit = os.environ["SUBREDDIT"]
	username = os.environ["USERNAME"]
	dataset_name = f"{username}/dataset-creator-{subreddit}"


	def log_file_to_html_string():
	log_file = "mylog.log"
	num_lines_visualize = 50

	console = Console(record=True, width=150)
	with open(log_file, "rt") as f:
	# Seek to the end of the file minus 300 lines
	# Read the last 300 lines of the file
	lines = f.readlines()
	lines = lines[-num_lines_visualize:]

	# Syntax-highlight the last 300 lines of the file using the Python lexer and Monokai style
	output = "".join(lines)
	syntax = Syntax(output, "python", theme="monokai", word_wrap=True)

	console.print(syntax)
	html_content = console.export_html(inline_styles=True)

	return html_content


	markdown = f"""
	# Reddit Scraper
	This is a reddit scraper which builds [{dataset_name}](https://huggingface.co/datasets/{dataset_name}).

	As shown below this space pulls data from pushshift.io, processes it, and puts it in a corresponding dataset.
	"""

	with gr.Blocks() as demo:
	gr.Markdown(markdown)
	gr.Image(proj_dir / 'media' / 'reddit_scraper.drawio.png')
	output = gr.HTML(log_file_to_html_string, every=1)
	demo.load(None,
	_js="""
	() => {
	document.body.classList.toggle('dark');
	document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)'
	}
	""", )

	if __name__ == '__main__':
	demo.launch(server_name="0.0.0.0", show_error=True, server_port=7860, enable_queue=True)