{"id":88,"date":"2024-02-06T01:19:16","date_gmt":"2024-02-05T17:19:16","guid":{"rendered":"https:\/\/www.nickchan.cn\/?p=88"},"modified":"2024-07-10T03:21:49","modified_gmt":"2024-07-09T19:21:49","slug":"bilibili%e7%bd%91%e7%ab%99%e5%9f%ba%e4%ba%8e%e5%85%b3%e9%94%ae%e5%ad%97%e7%88%ac%e5%8f%96%e6%95%b0%e6%8d%ae","status":"publish","type":"post","link":"https:\/\/www.nickchan.cn\/index.php\/2024\/02\/06\/bilibili%e7%bd%91%e7%ab%99%e5%9f%ba%e4%ba%8e%e5%85%b3%e9%94%ae%e5%ad%97%e7%88%ac%e5%8f%96%e6%95%b0%e6%8d%ae\/","title":{"rendered":"Bilibili\u7f51\u7ad9\u57fa\u4e8e\u5173\u952e\u5b57\u722c\u53d6\u6570\u636e"},"content":{"rendered":"<h1>\u529f\u80fd\uff1a<\/h1>\n<p>\u901a\u8fc7\u8f93\u5165\u6307\u5b9a\u5173\u952e\u5b57\u3001\u89c6\u9891\u9875\u6570\u53ca\u4fdd\u5b58\u6587\u4ef6\u540d\uff0c\u722c\u53d6\u89c6\u9891\u7f51\u7ad9\u4e0a\u6307\u5b9a\u8303\u56f4\u5185\u7684\u89c6\u9891\u6570\u636e\uff0c\u5982\u540d\u79f0\u3001\u94fe\u63a5\u3001\u64ad\u653e\u91cf\uff0c\u8bc4\u8bba\u91cf\u7b49<\/p>\n<h1>\u4ee3\u7801\uff1a<\/h1>\n<pre><code class=\"language-python\">import requests\nfrom lxml import etree\nimport time\nimport random\nimport csv\nimport pandas as pd\n\ndef get_target(keyword, page,saveName):\n    result = pd.DataFrame()\n\n    for i in range(1, page + 1):\n        headers = {\n          &#039;user-agent&#039;: &#039;Mozilla\/5.0 (Windows NT 10.0; WOW64) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/80.0.3987.116 Safari\/537.36&#039;\n        }\n\n        url = &#039;https:\/\/search.bilibili.com\/all?keyword={}&amp;from_source=nav_suggest_new0&amp;page={}&#039;.format(keyword, i)\n        html = requests.get(url.format(i), headers=headers)\n        bs = etree.HTML(html.text)\n        items = bs.xpath(&#039;\/\/li[@class = &quot;video-item matrix&quot;]&#039;)\n        for item in items:\n            video_url = item.xpath(&#039;div[@class = &quot;info&quot;]\/div\/a\/@href&#039;)[0].replace(&quot;\/\/&quot;,&quot;&quot;)                   #\u6bcf\u4e2a\u89c6\u9891\u7684\u6765\u6e90\u5730\u5740\n            title = item.xpath(&#039;div[@class = &quot;info&quot;]\/div\/a\/@title&#039;)[0]                  #\u6bcf\u4e2a\u89c6\u9891\u7684\u6807\u9898\n            region = item.xpath(&#039;div[@class = &quot;info&quot;]\/div[1]\/span[1]\/text()&#039;)[0].strip(&#039;\\n        &#039;)          #\u6bcf\u4e2a\u89c6\u9891\u7684\u5206\u7c7b\u7248\u5757\u5982\u52a8\u753b\n            view_num = item.xpath(&#039;div[@class = &quot;info&quot;]\/div[3]\/span[1]\/text()&#039;)[0].strip(&#039;\\n        &#039;)         #\u6bcf\u4e2a\u89c6\u9891\u7684\u64ad\u653e\u91cf\n            danmu = item.xpath(&#039;div[@class = &quot;info&quot;]\/div[3]\/span[2]\/text()&#039;)[0].strip(&#039;\\n        &#039;)         #\u5f39\u5e55\n            upload_time  = item.xpath(&#039;div[@class = &quot;info&quot;]\/div[3]\/span[3]\/text()&#039;)[0].strip(&#039;\\n        &#039;)  # \u4e0a\u4f20\u65e5\u671f\n            up_author = item.xpath(&#039;div[@class = &quot;info&quot;]\/div[3]\/span[4]\/a\/text()&#039;)[0].strip(&#039;\\n        &#039;)          #up\u4e3b\n\n            df = pd.DataFrame({&#039;region&#039;: [region],&#039;title&#039;: [title], &#039;view_num&#039;: [view_num], &#039;danmu&#039;: [danmu], &#039;upload_time&#039;: [upload_time], &#039;up_author&#039;: [up_author], &#039;video_url&#039;: [video_url]})\n            result = pd.concat([result, df])\n\n        time.sleep(random.random() + 1)\n        print(&#039;\u5df2\u7ecf\u5b8c\u6210b\u7ad9\u7b2c {} \u9875\u722c\u53d6&#039;.format(i))\n    saveName = saveName + &quot;.csv&quot;\n    result.to_csv(saveName, encoding=&#039;utf-8-sig&#039;,index=False)  # \u4fdd\u5b58\u4e3acsv\u683c\u5f0f\u7684\u6587\u4ef6\n    return result\n\nif __name__ == &quot;__main__&quot;:\n    keyword = input(&quot;\u8bf7\u8f93\u5165\u8981\u641c\u7d22\u7684\u5173\u952e\u8bcd\uff1a&quot;)\n    page = int(input(&quot;\u8bf7\u8f93\u5165\u8981\u722c\u53d6\u7684\u9875\u6570\uff1a&quot;))\n    saveName = input(&quot;\u8bf7\u8f93\u5165\u8981\u4fdd\u5b58\u7684\u6587\u4ef6\u540d\uff1a&quot;)\n    get_target(keyword, page,saveName)<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u529f\u80fd\uff1a \u901a\u8fc7\u8f93\u5165\u6307\u5b9a\u5173\u952e\u5b57\u3001\u89c6\u9891\u9875\u6570\u53ca\u4fdd\u5b58\u6587\u4ef6\u540d\uff0c\u722c\u53d6\u89c6\u9891\u7f51\u7ad9\u4e0a\u6307\u5b9a\u8303\u56f4\u5185\u7684\u89c6\u9891\u6570\u636e\uff0c\u5982\u540d\u79f0\u3001\u94fe\u63a5\u3001\u64ad\u653e\u91cf\uff0c\u8bc4 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"https:\/\/www.nickchan.cn\/index.php\/2024\/02\/06\/bilibili%e7%bd%91%e7%ab%99%e5%9f%ba%e4%ba%8e%e5%85%b3%e9%94%ae%e5%ad%97%e7%88%ac%e5%8f%96%e6%95%b0%e6%8d%ae\/\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u201cBilibili\u7f51\u7ad9\u57fa\u4e8e\u5173\u952e\u5b57\u722c\u53d6\u6570\u636e\u201d<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[138],"tags":[],"_links":{"self":[{"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/posts\/88"}],"collection":[{"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/comments?post=88"}],"version-history":[{"count":1,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/posts\/88\/revisions"}],"predecessor-version":[{"id":89,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/posts\/88\/revisions\/89"}],"wp:attachment":[{"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/media?parent=88"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/categories?post=88"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.nickchan.cn\/index.php\/wp-json\/wp\/v2\/tags?post=88"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}