Created by Frank Yang
>>> import requests
>>> req = requests.get('http://example.com')
>>> req.text
>>> payload = {'key1': 'value1', 'key2': 'value2'}
>>> req = requests.post("http://httpbin.org/post", data=payload)
>>> req.text
>>> from bs4 import BeautifulSoup
>>> req = requests.get('https://raw.githubusercontent.com/FrankYang0529/Parser-Tutorial/master/parse.html')
>>> soup = BeautifulSoup(req.text, 'html.parser')
>>> div = soup.find("div")
...
>>> div.a['href']
... 'http://google.com'
>>> div_all = soup.find_all("div", {"class": "item"})
... [, ]
>>> div_all[1].a.span.text
... 'Facebook'
@asyncio.coroutine
def coro_function():
print ("sleeping")
yield from asyncio.sleep(3)
print ("Done")
loop = asyncio.get_event_loop()
f = asyncio.wait([coro_function(), coro_function()])
loop.run_until_complete(f)
sleeping
sleeping
Done
Done
@asyncio.coroutine
def get(*args, **kwargs):
response = yield from aiohttp.request('GET', *args, **kwargs)
return (yield from response.read())
def first_magnet(page):
soup = bs4.BeautifulSoup(page, 'html.parser')
return soup.title
@asyncio.coroutine
def print_magnet(query):
url = query
page = yield from get(url, compress=True)
magnet = first_magnet(page)
print('{}: {}'.format(query, magnet))
distros = ['http://www.bbc.com/news/election-us-2016-35760148', 'http://www.bbc.com/news/world-europe-35760985', 'http://www.bbc.com/news/world-asia-35760797']
loop = asyncio.get_event_loop()
f = asyncio.wait([print_magnet(d) for d in distros])
loop.run_until_complete(f)
python3 async_parse.py
0.38s user 0.04s system 59% cpu 0.701 total
python3 general_parse.py
0.42s user 0.05s system 16% cpu 2.790 total