cnzz 导出所有当天关键字的python程序
发布: 2009-12-30 20:47 | 作者: phpsir | 查看: 45次
源代码如下:
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 from twill.commands import *
7 def login(username,password):
8 go("http://www.cnzz.com")
9 fv(1,"username",username)
10 fv(1,"password",password)
11 submit()
12 def getkey(siteid,date,lastpage):
13 iplinks = re.compile(r"http://tool.chinaz.com/ip/\?ip=")
14 lastpage=int(lastpage)
15 b=get_browser()
16 for page in range(1,lastpage):
17 b.go("http://new.cnzz.com/v1/main.php?siteid="+siteid+"&s=key&st="+date+"&et="+date+"&page=" + str(page))
18 html = b.get_html().decode('gbk','ignore')
19 re1 = re.compile(u"<td title='(.+?)'>.+?<\/td>")
20 for t in re1.findall(html):
21 print t.encode('utf-8')
22
23 if __name__ == "__main__":
24 username = sys.argv[1]
25 password = sys.argv[2]
26 siteid = sys.argv[3]
27 today = sys.argv[4]
28 page = sys.argv[5]
29 login(username,password)
30 getkey(siteid,today,page)
~
下载
cnzzip.rar(557 B )用法 python cnzzip.py cnzzusername cnzzpassword siteid 日期 页数
比如
python cnzzip.py username password 1234567 2009-12-30 30

