requests is a simple HTTP library implemented by python, which is much simpler to use than urllib.
Because it is a third-party library, cmd installation is required before use

  • pip install requests

Basic usage:

requests.get() is used to request the target website, the type of which is an HTTP response type

import requests

response = requests.get('http://www.baidu.com')
print(response.status_code)  # Print status code
print(response.url)          # Print request url
print(response.headers)      # Print header information
print(response.cookies)      # Print cookie information
print(response.text)  #Print Web Source in Text
print(response.content) #Printing in byte stream form

Operation results:
Status code: 200
url: www.baidu.com
headers information

Various requests:

import requests

requests.get('http://httpbin.org/get')
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
Basic get request
import requests

response = requests.get('http://httpbin.org/get')
print(response.text)

GET requests with parameters:

The first is to put parameters directly in the url

import requests

response = requests.get(http://httpbin.org/get?name=gemey&age=22)
print(response.text)

The other one fills in the Dict first, and specifies the params parameter as dict when the request is initiated.

import requests

data = {
    'name': 'tom',
    'age': 20
}

response = requests.get('http://httpbin.org/get', params=data)
print(response.text)
Parsing json
import requests

response = requests.get('http://httpbin.org/get')
print(response.text)
print(response.json())  #response.json() method is the same as json.loads(response.text)
print(type(response.json()))

Simply save a binary file
Binary content is response.content

import requests

response = requests.get('http://img.ivsky.com/img/tupian/pre/201708/30/kekeersitao-002.jpg')
b = response.content
with open('F://fengjing.jpg','wb') as f:
    f.write(b)

Add header information to your request

import requests
heads = {}
heads['User-Agent'] = 'Mozilla/5.0 ' \
                          '(Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 ' \
                          '(KHTML, like Gecko) Version/5.1 Safari/534.50'
 response = requests.get('http://www.baidu.com',headers=headers)
Use agent

With the addition of the headers method, the proxy parameter should also be a dict

Here we use the requests library to crawl the IP and ports and types of IP proxy websites

Because it is free, the proxy address used will soon expire.

import requests
import re

def get_html(url):
    proxy = {
        'http': '120.25.253.234:812',
        'https' '163.125.222.244:8123'
    }
    heads = {}
    heads['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
    req = requests.get(url, headers=heads,proxies=proxy)
    html = req.text
    return html

def get_ipport(html):
    regex = r'<td data-title="IP">(.+)</td>'
    iplist = re.findall(regex, html)
    regex2 = '<td data-title="PORT">(.+)</td>'
    portlist = re.findall(regex2, html)
    regex3 = r'<td data-title="type">(.+)</td>'
    typelist = re.findall(regex3, html)
    sumray = []
    for i in iplist:
        for p in portlist:
            for t in typelist:
                pass
            pass
        a = t+','+i + ':' + p
        sumray.append(a)
    print('High hiding agent')
    print(sumray)


if __name__ == '__main__':
    url = 'http://www.kuaidaili.com/free/'
    get_ipport(get_html(url))
Basic POST requests:
import requests

data = {'name':'tom','age':'22'}

response = requests.post('http://httpbin.org/post', data=data)

Get cookie

#Get cookie
import requests

response = requests.get('http://www.baidu.com')
print(response.cookies)
print(type(response.cookies))
for k,v in response.cookies.items():
    print(k+':'+v)
Conversation maintenance
import requests

session = requests.Session()
session.get('http://httpbin.org/cookies/set/number/12345')
response = session.get('http://httpbin.org/cookies')
print(response.text)
Certificate Verification Settings
import requests
from requests.packages import urllib3

urllib3.disable_warnings()  #Eliminate warnings from urllib3
response = requests.get('https://www.12306.cn',verify=False) # Certificate verification is set to FALSE
print(response.status_code)

Print result: 200

Time-out exception capture
import requests
from requests.exceptions import ReadTimeout

try:
    res = requests.get('http://httpbin.org', timeout=0.1)
    print(res.status_code)
except ReadTimeout:
    print(timeout)
exception handling

When you're not sure what's going to happen, try as much as possible. except to catch exceptions
All requests exception:
Exceptions

import requests
from requests.exceptions import ReadTimeout,HTTPError,RequestException

try:
    response = requests.get('http://www.baidu.com',timeout=0.5)
    print(response.status_code)
except ReadTimeout:
    print('timeout')
except HTTPError:
    print('httperror')
except RequestException:
    print('reqerror')