<span style="color:#000000"># -*- coding: UTF-8 -*- import requests import time import re from bs4 import BeautifulSoup from urllib.request import urlretrieve if __name__ == '__main__': list_url = [] url = 'https://www.names.org/n/kevin/about' #设置请求头信息 headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" } req = requests.get(url=url,headers=headers) req.encoding='utf-8' html=req.text bf = BeautifulSoup(html,'html.parser') targets_url_1 = bf.find_all(class_='container page-section') bf = BeautifulSoup(str(targets_url_1),'html.parser') targets_url_2 = bf.find_all(class_='name') #保存名字链接 for each in targets_url_2: list_url.append(re.sub('[\t\n]',"",re.sub(r'<[^>]+>',"",str(each)))) f = open('test.txt', 'w') #首先先创建一个文件对象,打开方式为w for each in list_url: f.writelines(each) #用readlines()方法写入文件 f.writelines('\n') print(list_url) print('下载完成!') </span>
No comments:
Post a Comment