我想说urllib2确实很难用,直到有人跟我提到一个 requests的东西, 还是去玩 requests 吧. 2014.10.19

连通性测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/usr/bin/env python
#coding=utf-8
import urllib2

def check(url = 'http://m.baidu.com' ):

request = urllib2.Request(url)

try:
urllib2.urlopen(request)

except urllib2.URLError , e:
if hasattr(e, 'code'):
print 'error code:' , e.code
if hasattr(e, 'reason'):
print 'error reason:', e.reason

测试代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
#coding:utf-8
import urllib
import urllib2
import cookielib
import prepare

prepare.check()

#urllib2
url = 'http://m.baidu.com'
queries = {
'name': 'rming',
'passwd': 'password',
}

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
data = urllib.urlencode(queries)

headers = {
'User-Agent': user_agent
}
request = urllib2.Request(url, data, headers)
#add header
request.add_header('Accept', 'text/html')
request.add_header('Referer','http://mail.qq.com')
print request.headers

responce = urllib2.urlopen(request , timeout=10)
html = responce.read()

print "info:",responce.info()

print "geturl:",responce.geturl()

print "length:",len(html)

pos = html.find('href')

print html[pos:pos+100]


#debug
httpHandler = urllib2.HTTPHandler(debuglevel=1)
httpsHandler = urllib2.HTTPSHandler(debuglevel=1)
opener = urllib2.build_opener(httpHandler, httpsHandler)
urllib2.install_opener(opener)
responce = urllib2.urlopen('http://m.qq.com')


#cookielib
cookie = cookielib.CookieJar()

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))

responce = opener.open('http://www.baidu.com')

for item in cookie:
print item.name,'=>',item.value

#utf8编码
reload(sys)
sys.setdefaultencoding("utf8")
print sys.getdefaultencoding();

#raw_input
name = str(raw_input(u'请输入你的名字:\n'))
age = int(raw_input(u'请输入你的年龄:\n'))

print u"名字:",name
print u"年龄:",age

QA:

1.编码错误,报错:Non-ASCII character ‘\xe8’ in file

  缺少声明: #coding:utf-8

2.编码错误,报错:’ascii’ codec can’t encode characters in position

  如果设置了上面的编码生命,则在中文字符串(u”字符串”)前省略 “u” 试试.
  如果不行,则试用string 的 decode() 和 encode()