import os from enum import Enum from typing import Iterator, Tuple, Union, List, Literal import cloudscraper from hbutils.system import urlsplit from pyquery import PyQuery as pq from .web import WebDataSource from ..utils import get_requests_session, srequest class OrderBy(str, Enum): STAR_DATE = "stars_date" DATE = "date" DATE_REVERS = "date_r" RATING = "rating" DOWNLOADS = "views" SIZE = "size" TAG_COUNT = "tag_num" class Period(str, Enum): ANYTIME = "0" PAST_DAY = "3" PAST_WEEK = "1" PAST_MONTH = "2" PAST_6_MONTHS = "4" PAST_YEAR = "5" PAST_2_YEARS = "6" PAST_3_YEARS = "7" class AnimePicturesSource(WebDataSource): __root__ = 'https://anime-pictures.net' def __init__(self, tags: List[str], tag_mode: Literal['or', 'and'] = 'and', denied_tags: List[str] = None, denied_tag_mode: Literal['or', 'and'] = 'or', order_by: OrderBy = OrderBy.RATING, period: Period = Period.ANYTIME, select: Literal['thumbnail', 'preview', 'original'] = 'original', group_name: str = 'anime_pictures', download_silent: bool = True, **kwargs): WebDataSource.__init__( self, group_name, get_requests_session(session=cloudscraper.create_scraper()), download_silent, ) self.tags, self.tag_mode = tags, tag_mode self.denied_tags, self.denied_tag_mode = (denied_tags or []), denied_tag_mode self.tag_mode = tag_mode self.order_by = order_by self.period = period self.select = select self.kwargs = kwargs def _params(self, page): params = { 'order_by': self.order_by.value, 'ldate': self.period.value, 'lang': 'en', 'page': str(page), } if self.tag_mode == 'and': params['search_tag'] = '&&'.join(self.tags) else: params['search_tag'] = '||'.join(self.tags) if self.denied_tags: if self.denied_tag_mode == 'and': params['denied_tags'] = '&&'.join(self.denied_tags) else: params['denied_tags'] = '||'.join(self.denied_tags) return {**params, **self.kwargs} def _get_url(self, post, resp): id_, md5 = post['id'], post['md5'] if self.select == 'thumbnail': return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_bp.jpg' elif self.select == 'preview': return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_cp.jpg' elif self.select == 'original': return pq(resp.text)('#rating a.download_icon').attr('href') else: raise ValueError(f'Invalid image selection - {self.select!r}.') def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]: page = 0 while True: resp = srequest(self.session, 'GET', f'{self.__root__}/api/v3/posts', params=self._params(page)) resp.raise_for_status() posts = resp.json()['posts'] if not posts: break for post in posts: resp_page = srequest(self.session, 'GET', f'{self.__root__}/posts/{post["id"]}?lang=en') resp_page.raise_for_status() url = self._get_url(post, resp_page) tags = [item.text().replace(' ', '_') for item in pq(resp_page.text)('ul.tags li > a').items()] _, ext_name = os.path.splitext(urlsplit(url).filename) filename = f'{self.group_name}_{post["id"]}{ext_name}' meta = { 'anime_pictures': post, 'group_id': f'{self.group_name}_{post["id"]}', 'filename': filename, 'tags': {key: 1.0 for key in tags} } yield post['id'], url, meta page += 1