本帖最后由 iamOk 于 15-12-2013 18:02 编辑
挺有意思的,这两天在学python, 也写了个完成同样功能的,装了python的可以直接用来,没有装的可以先装一个python.- #copyright, IamOk@freeOZ
- from xml.dom import minidom;
- import re;
- import urllib2;
- import os;
- from HTMLParser import HTMLParser
- xmlurl="http://www.abc.net.au/news/feed/54536/rss.xml";
- prefix="download/";
- class MP3Parser(HTMLParser):
- inscript=0;
- scriptname=0;
- def handle_starttag(self, tag, attrs):
- if(tag=='script'):
- self.inscript=1;
- if( tag=='div'):
- for name, value in attrs:
- if name == 'class' and re.search('media-transcript', value):
- self.inscript=3;
- def handle_endtag(self, tag):
- if(tag=='script' or tag=='div'):
- self.inscript=2;
- def handle_data(self, data):
- if(self.inscript==1):
- cline=data.split(',');
- for csource in cline:
- mp3=re.search('\.mp3', csource);
- if mp3:
- tmps= csource.split()[1][1:][:-1];
- filename=prefix+tmps.split('/')[-1];
- self.scriptname=filename+'.txt';
- if os.path.isfile(filename):
- self.scriptname=0;
- break;
- else:
- print "downloading "+tmps;
- try:
- mymp3=urllib2.urlopen(tmps);
- mp3file = mymp3.read();
- fo = open(filename, "wb");
- fo.write(mp3file);
- fo.close();
- except ValueError:
- break;
- if(self.inscript==3 and self.scriptname!=0 ):
- fs = open(self.scriptname, "a");
- fs.write(data);
- fs.close();
- # instantiate the parser and fed it some HTML
- parser = MP3Parser()
- myurl=urllib2.urlopen(xmlurl);
- html = myurl.read();
- fo = open("result.xml", "w");
- fo.write(html);
- fo.close();
- xmldoc = minidom.parse('result.xml')
- if not os.path.exists(prefix):
- os.makedirs(prefix);
- for node in xmldoc.getElementsByTagName('link'):
- mylink= node.childNodes[0].nodeValue;
- if(re.findall('[0-9]', mylink)):
- print("opening "+ mylink);
- myurl=urllib2.urlopen(mylink);
- html = myurl.read();
- parser.feed(html);
- print("processing next link\r\n");
-
复制代码 |