2008年9月16日 星期二

MP3下載:baidu top100

#!/usr/bin/python

import urllib
import urlparse
import htmllib
import formatter
import string
import os
import sys
import thread
#import threading

class Parser(htmllib.HTMLParser):
#return a dictionary mapping anchor texts to lists of associated hyperlinks

def __init__(self, verbose=0):
self.anchors = {}
f = formatter.NullFormatter()
htmllib.HTMLParser.__init__(self, f, verbose)
def anchor_bgn(self, href, name, type):
self.save_bgn()
self.anchor = href

def anchor_end(self):
text = string.strip(self.save_end())
if self.anchor and text:
self.anchors[text] = self.anchors.get(text, []) + [self.anchor]

# 下載url指定的網絡資源對像
def SaveFile(url, path):
try:
seps = url.split("/")
size = len(seps)
name = seps[size-1]
#print url
name = path + "\\" + name

url = UncodeUrl(url)
if url.find(".mp3") == -1:
return 0;
content = DownObjectByUrl(url)
if len(content) < i =" 1" list =" name.split(" name =" list[0]" name =" list[0]" op =" open(name," rn=" + repr(rn) + " pn=" + repr(pn) + " ln=" + repr(ln) pn += 18 return url def DownObjectByUrl(url): fp = urllib.urlopen(url) content = " s =" fp.read(8192)" links =" []" html =" DownObjectByUrl(url)" p =" Parser()" cnt =" 0">", v
#print "----------------------------"
#for item in links:
# print item
return links


#-------- 破解baidu對url的變形處理,從加密的url還原出正確的mp3鏈接 --------------

def N(S,P,Q):
for R in range(S, P):
K[R]=R+Q
H[R+Q]=R

def A(Q):
P=len(Q)
S=""
for R in range(0, P):
T=Q[R]
if T >= 'A' and T <= 'Z' or T >= 'a' and T <= 'z' or T >= '0' and T <= '9': i = ord(Q[R]) U = H[i] - M if U < t =" chr(K[U])" o="" e="" m="string.atoi(F)%26" m =" 1" o="A(L)" l ="="" e =" O" e =" A(J)" k =" content.find(" f=") if k == -1: return 0 k += len(" f=") str = content[k:-1] k = str.find(" k ="="" f =" str[:k]" k =" str.find(" i=") if k == -1: return 0 k += len(" l=") + 1 str2 = str[k:-1] k = str2.find(" k ="="" l =" str2[:k]" k =" str2.find(" j=") if k == -1: return 0 k += len(" j=") + 1 str3 = str2[k:-1] k = str3.find(" k ="="" j =" str3[:k]" f=", F #print " l=", L #print " j=", J #baidu mp3 url 鏈接變形元素 F = " l = "" j = "" m =" 0" k="[]" h="[]" url2 = "" b =" 0" i =" ord(c)" b ="="" b =" 0" s =" repr(hex(i))">= 0x80:
url2 += "%"
s = repr(hex(i))
url2 += s[-3] + s[-2]
b = 1
elif c == ' ':
url2 += "%20"
else:
url2 += c
return url2


url1 = "http://list.mp3.baidu.com/list/newhits.html?top1"

def DownMp3(item):
links2 = GetNextRankLinks(item)
for item2 in links2:
item2 = UncodeUrl(item2)
content = DownObjectByUrl(item2)
if content == "":
continue
if GetBaiduFLJ(content) == 0:
continue

mp3url = DisUrl()
if mp3url == "":
continue
#mp3url = UncodeUrl(mp3url)
#if mp3url.find(".mp3") == -1:
# continue
if SaveFile(mp3url, "c:\\baidump3") == 1:
break

links = GetNextRankLinks(url1)
for item in links:
if string.find(item, "http://") != -1 and string.find(item, "word=") != -1:
print item
thread.start_new_thread(DownMp3, (item,))

while True: