Python3爬取妹子图的测试源码+Vim配置源码(图)

import re

import requests


def get_url(url,headers):

    response = requests.get(url,headers=headers)

    return response.text


def get_img_page(html):

    pattern = re.compile('<figure.*?href="(.*?)".*?</figure>',re.S)

    items = re.findall(pattern,html)

    return items


def parse_img_page(item):

    header = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",'Referer':item}

    img_url = get_url(item,headers)

    pattern = re.compile('<figure.*?src="(.*?)".*?</figure>',re.S)

    dow_addr = re.findall(pattern,img_url)

    return dow_addr[0]


def save_img(dow_addr,headers):

    response = requests.get(dow_addr,headers=headers)

    filename = dow_addr.split('/')[-1]

    with open(filename,'wb') as f:

        f.write(response.content)

        f.close()


def main():

    img_urls = get_url()

    for img_url in img_urls:

        pass


if __name__ == '__main__':

    headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36"}

    url = 'http://m.mzitu.com/' 

    html = get_url(url,headers)

    items = get_img_page(html)

    print(items)

    for item in items:

        pass

        for i in range(2,35):

            img_addr = item + str('/') + str(i)

            print(img_addr)

            dow_addr = parse_img_page(img_addr)

            headers = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",'Referer':img_addr}

            save_img(dow_addr,headers)





Vim:

看不清?看下面(AwA嘻嘻)


感谢启明星的勾引提供的部分支持


本文禁止转载或摘编

-- --
  • 投诉或建议
评论