#!/usr/bin/python
# -*- coding: UTF-8 -*-
# Author: Huang Jiahua <jhuangjiahua(at)gmail.com>
# License: GNU LGPL
#
# getlyric.py
# 百度歌词 下载工具
# 用法,将本脚本放在 ~/bin/getlyric.py
# 加上可执行权限
# 执行 getlyric.py 歌名
# 就可以显示 lrc 格式歌词
#
# getlyric.py -p 歌名
# 表示去掉 lrc 标记
#
# getlyric.py -c 歌名
# 表示重新下载歌词
#
"""Get the Lyric from web
call getlyric.getlyric(keyword) will return lyric for keyword
the keyword will be title, artist, etc
"""
__revision__ = '0.1'
import os,sys,re,urllib
homedir = os.environ['HOME']
lyricdir = os.path.join(homedir,'.lyrics')
if not os.path.isdir(lyricdir): os.mkdir(lyricdir)
## gbk2utf8(docurl(mkurl(urllib.quote(keyword.decode('utf8').encode('gbk')))))
def _gbk2utf8(stri):
'''iconv gbk -> utf8
errors be given to set 'ignore' or 'replace' handling scheme
'''
stri = stri.decode('gb18030','replace').encode('utf8','replace')
return stri
def _utf82gbk(stri):
'''iconv utf8 -> gb18030
errors be given to set 'ignore' or 'replace' handling scheme
'''
stri = stri.decode('utf8','replace').encode('gb18030','replace')
return stri
def _docurl(url):
'''get content from url
'''
return urllib.urlopen(url).read()
def _txt2url(txt):
'''urllib.quote('abc def') -> 'abc%20def'
'''
return urllib.quote(txt)
def get_baidulrc_list(keyword):
'''Get lyric list form mp3.baidu.com
return lyric url list
'''
url = 'http://mp3.baidu.com/m?f=ms&tn=baidump3lyric&ct=150994944&lf=2&rn=10&word='
url = url + _txt2url(_utf82gbk(keyword)) + '&lm=-1'
cont = _docurl(url)
## find http://XXXX/XXXX.lrc from html
lrc_list=re.findall(r'\http\S*?\.lrc',cont)
return lrc_list
def get_netbaidulrc(keyword):
'''Get lyric form mp3.baidu.com
return lyric formed
'''
lrc_list = get_baidulrc_list(keyword)
if (len(lrc_list) > 0):
url = lrc_list[0]
lyric = _gbk2utf8(_docurl(lrc_list[0]))
return lyric
return ''
def get_netlyric(keyword):
'''Get lyric content from network
return lyric formed
'''
return get_netbaidulrc(keyword)
def getlyric(keyword):
'''Get lyric content, with local cache
it will use local cache first
return lyric formed
'''
lyricfile = os.path.join(lyricdir, keyword+'.lrc')
if os.path.isfile(lyricfile):
return open(lyricfile).read()
lyric = get_netlyric(keyword)
if (lyric==''): return ''
open(lyricfile,'w').write(lyric)
return lyric
def getlyric_plain(keyword):
'''get plain formatted lyric
the content removed [00:12.53][03:45.26]
it will use local cache first
return lyric plain
'''
lyric = getlyric(keyword)
lyric = re.sub(r'\[.*\]','',lyric)
return re.sub('\r\n\r\n','',lyric)
def regetlyric(keyword):
'''Reget lyric from network
it will reget and sync local cache
return lyric plain
'''
lyricfile = os.path.join(lyricdir, keyword+'.lrc')
lyric = get_netlyric(keyword)
if (lyric==''): return getlyric(keyword)
open(lyricfile,'w').write(lyric)
return lyric
def regetlyric_plain(keyword):
'''Reget lyric from network
the content removed [00:12.53][03:45.26]
it will use local cache first
return get plain formatted lyric
'''
lyric = getlyric(keyword)
lyric = re.sub(r'\[.*\]','',lyric)
return re.sub('\r\n\r\n','',lyric)
## test command
if __name__=="__main__":
if (sys.argv[1] == '-h'):
print 'Get the Lyric from web'
print 'Usage:', sys.argv[0], ''''[options] KEYWORD\noptions:
-h print help
-p plain formatted
-c reget lyric from network'''
elif (sys.argv[1] == '-p'):
print getlyric_plain(sys.argv[2])
elif (sys.argv[1] == '-c'):
print regetlyric(sys.argv[2])
else:
print getlyric(sys.argv[1])
keyword = sys.argv[1]
-