Traceback (most recent call last): File "requests\adapters.py", line 439, in send File "urllib3\connectionpool.py", line 785, in urlopen File "urllib3\util\retry.py", line 592, in increment urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='www.jpmn8.cc', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1124)')))
Python3 requests 下载进度条
代码:
def proxy_get_content_stream(url): if is_use_proxy: socks.set_default_proxy(socks.SOCKS5, PROXY_HOST, PROXY_PORT) socket.socket = socks.socksocket return requests.get(url, headers=HEADERS, stream=True, timeout=300) def save_image_from_url_with_progress(url, cnt): with closing(proxy_get_content_stream(url)) as response: chunk_size = 1024 # 单次请求最大值 content_size = int(response.headers['content-length']) # 内容体总大小 data_count = 0 with open(cnt, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) data_count = data_count + len(data) now_position = (data_count / content_size) * 100 print("\r[D] 下载进度: %s %d%%(%d/%d)" % (int(now_position) * '▊' + (100 - int(now_position)) * ' ', now_position, data_count, content_size,), end=" ") print('')
BeautifulSoup4 中文乱码
BeautifulSoup4解析页面的时候发现有一部分内容是乱码,刚开始还以为是pycharm的问题,后来发现可能问题不是出在pycharm上,因为普通的print打印的中文是没有问题的。测试代码如下:
def proxy_get(url): if is_use_proxy: socks.set_default_proxy(socks.SOCKS5, PROXY_HOST, PROXY_PORT) socket.socket = socks.socksocket req = requests.get(url, headers=HEADERS) return req.text def get_sub_pages_test(url): ''' http://www.meitulu.cn/t/shishen/ :param url: :return: ''' bs = BeautifulSoup(proxy_get(url), "html.parser") boxes = bs.find('div', class_='boxs') lis = boxes.find_all('li') log_text('PAGE', '开始分析页面链接', is_begin=True) for l in lis: p = l.find('p', class_='p_title') print( p.text)
Python requests socks代理
更新requests 为支持socks的版本:
pip install -U requests[socks]
测试代码:
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
}
proxies = {
#'socks5': '127.0.0.1:8889'
'http':'socks5://127.0.0.1:8889',
'https':'socks5://127.0.0.1:8889'
}
url = 'http://nkm3s8.xyz/'
try:
print('Proxy:')
res = requests.get(url, headers=headers, proxies=proxies, timeout=10)
print(res)
except Exception as e:
print(e)
try:
print('No Proxy:')
res = requests.get(url, headers=headers, timeout=10)
print(res)
except Exception as e:
print(e)