Python学习笔记(文件存储:TXT存储)

open()方法常用写入方式:

Python学习笔记(文件存储:TXT存储)

 

 

import requests
from pyquery import PyQuery as pq

url = 'https://www.zhihu.com/explore'
headers = {
    'authority': 'www.zhihu.com',
    'pragma': 'no-cache',
    'cache-control': 'no-cache',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
    'sec-fetch-dest': 'document',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-user': '?1',
    'referer': 'https://www.zhihu.com/explore',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cookie': '_zap=dc4571ea-3dad-4be8-b997-b3f57c056769; d_c0="ACBk5gUOoBCPTno3T_J5hZOFl6gFOL5RL00=|1578399001"; _xsrf=cF5MTFbH2ZH0NYo7vOlMp2FyoDlDnMdh; capsion_ticket="2|1:0|10:1580187626|14:capsion_ticket|44:YjAzNDM2YTMwZjlhNDM4YjljMTNiYmUwNDM1MTc5MWI=|87966451bb39d5c88e16124664850fe5b1ecb16311389b1d23757509220268e1"; z_c0="2|1:0|10:1580187646|4:z_c0|92:Mi4xWDgwdkF3QUFBQUFBSUdUbUJRNmdFQ1lBQUFCZ0FsVk5fZzBkWHdCZDZQa1VVenYxQ2F6bG5SS0F0UmRmdXdRaVp3|d47338e36bea227d304085d548d1dbb137faf10d9e1519b19d8749a77fadf91d"; tst=r; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1580636214,1580636604,1580636674,1581255318; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1581255336; KLBRSID=fb3eda1aa35a9ed9f88f346a7a3ebe83|1581255336|1581255316',
}

html = requests.get(url, headers=headers).text
print(html)
doc = pq(html)
items = doc('.explore-tab .feed-item').items()
for item in items:
    question = item.find('h2').text()
    author = item.find('.author-link-line').text()
    answer = pq(item.find('.content').html()).text()
    with open('D:/explore.txt','a',encoding='utf-8') as file: #a是直接写入,a换成w时,保存并清空,使用as不用调用close()
        file.write('\n'.join([question,author,answer]))
        file.write('\n'+'='*50+'\n')