IPython 中下划线的特殊用法

Created: November-22, 2018
from urllib.request import urlopen
from collections import Counter
import re

conn = urlopen('http://textfiles.com/100/dodontae.hum')
lines = conn.readlines()
conn.close()

# readlines() returns byte strings
data = ''.join([line.decode('utf-8') for line in lines]) 

# replace non-letters with a space
re.sub('[^A-Za-z]', ' ', data) 

# condense successive whitespace into a single space
# the underscore retrieves the most recent output 
re.sub('\s+', ' ', _)

# normalize the text by lowercasing and removing leading and trailing whitespace
_.lower().strip()

# split into words on space
words = _.split(' ')

from collections import Counter
word_count = Counter()

for word in words:
    word_count[word[0]] += 1

word_count.most_common()