Thursday, 29 October 2020

Python---获取div标签中的文字

 

<span style="color:#000000"># -*- coding: UTF-8 -*-
import requests
import time
import re
from bs4 import BeautifulSoup
from urllib.request import urlretrieve

if __name__ == '__main__':
    list_url = []
    url = 'https://www.names.org/n/kevin/about'
    #设置请求头信息
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
    }
    req = requests.get(url=url,headers=headers)
    req.encoding='utf-8'
    html=req.text
    bf = BeautifulSoup(html,'html.parser')
    targets_url_1 = bf.find_all(class_='container page-section')
    bf = BeautifulSoup(str(targets_url_1),'html.parser')
    targets_url_2 = bf.find_all(class_='name')
    
    #保存名字链接
    for each in targets_url_2:
        list_url.append(re.sub('[\t\n]',"",re.sub(r'<[^>]+>',"",str(each))))
    
    f = open('test.txt', 'w')  #首先先创建一个文件对象,打开方式为w
    for each in list_url:
        f.writelines(each)  #用readlines()方法写入文件
        f.writelines('\n')
         
    print(list_url)
     
    print('下载完成!')
          </span>

No comments:

Post a Comment

zomok E-commerce system plan. Choose your online ordering system. No-risk 30 day free trial. Then USD 9/month. No credit card required.

zomok E-commerce system plan. Choose your online ordering system. No-risk 30 day free trial. Then USD 9/month. No credit card required. h...