{ "cells": [ { "cell_type": "markdown", "id": "91225aac-0181-478d-bb6a-2ad7512ec0ca", "metadata": { "tags": [] }, "source": [ "### 更改设置\n", "更改并运行下方单元格" ] }, { "cell_type": "code", "execution_count": 13, "id": "0c42eb88-737c-4ead-b954-df29809e0076", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting danbooru_crawler/settings.py\n" ] } ], "source": [ "%%writefile danbooru_crawler/settings.py\n", "BOT_NAME = \"danbooru_crawler\"\n", "SEARCH_TAG = \"cat girl\" # 搜索内容\n", "SEARCH_TYPE = 1 # 0 / 1 是否抓取缩略图,0代表抓缩略图,1代表原图\n", "SEARCH_LINK = False # True / False 连锁搜索(即搜索子图,训练画风可以打开,训练概念等会出现与tag特征不符的图导致污染训练集)\n", "SEARCH_TAG_TXT = True # True / False 是否一并抓取图片自身的tag,推荐打开\n", "\n", "SPIDER_MODULES = [\"danbooru_crawler.spiders\"]\n", "NEWSPIDER_MODULE = \"danbooru_crawler.spiders\"\n", "IMAGES_STORE = \"./pics\"\n", "ROBOTSTXT_OBEY = False\n", "ITEM_PIPELINES = {\n", " \"danbooru_crawler.pipelines.PicsDownloadPipeline\": 1,\n", " # \"scrapy.pipelines.PicsDownloadPipeline\": 1,\n", "}\n", "REQUEST_FINGERPRINTER_IMPLEMENTATION = \"2.7\"\n", "TWISTED_REACTOR = \"twisted.internet.asyncioreactor.AsyncioSelectorReactor\"\n", "FEED_EXPORT_ENCODING = \"utf-8\"" ] }, { "cell_type": "markdown", "id": "97c71a2b-9474-462e-9910-3fcbea431f53", "metadata": { "tags": [] }, "source": [ "### 运行" ] }, { "cell_type": "code", "execution_count": null, "id": "5b398c0d-effe-4891-94cf-2b4148b149bb", "metadata": { "tags": [] }, "outputs": [], "source": [ "!python main.py" ] }, { "cell_type": "code", "execution_count": null, "id": "35873bfc-8e08-4017-a262-5c61f84fc235", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.1" } }, "nbformat": 4, "nbformat_minor": 5 }