作者 Huahua [actionscript] 2007-02-25 22:17 (点击下载)

  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. # Author: Huang Jiahua <jhuangjiahua(at)gmail.com>
  4. # License: GNU LGPL
  5. #
  6. # getlyric.py
  7. # 百度歌词 下载工具
  8. # 用法,将本脚本放在 ~/bin/getlyric.py
  9. # 加上可执行权限
  10. # 执行 getlyric.py 歌名
  11. # 就可以显示 lrc 格式歌词
  12. #
  13. # getlyric.py -p 歌名
  14. # 表示去掉 lrc 标记
  15. #
  16. # getlyric.py -c 歌名
  17. # 表示重新下载歌词
  18. #
  19.  
  20. """Get the Lyric from web
  21. call getlyric.getlyric(keyword) will return lyric for keyword
  22. the keyword will be title, artist, etc
  23. """
  24. __revision__ = '0.1'
  25.  
  26. import os,sys,re,urllib
  27.  
  28.  
  29. homedir = os.environ['HOME']
  30. lyricdir = os.path.join(homedir,'.lyrics')
  31. if not os.path.isdir(lyricdir): os.mkdir(lyricdir)
  32.  
  33.  
  34.  
  35. ## gbk2utf8(docurl(mkurl(urllib.quote(keyword.decode('utf8').encode('gbk')))))
  36.  
  37. def _gbk2utf8(stri):
  38. '''iconv gbk -> utf8
  39. errors be given to set 'ignore' or 'replace' handling scheme
  40. '''
  41. stri = stri.decode('gb18030','replace').encode('utf8','replace')
  42. return stri
  43.  
  44. def _utf82gbk(stri):
  45. '''iconv utf8 -> gb18030
  46. errors be given to set 'ignore' or 'replace' handling scheme
  47. '''
  48. stri = stri.decode('utf8','replace').encode('gb18030','replace')
  49. return stri
  50.  
  51. def _docurl(url):
  52. '''get content from url
  53. '''
  54. return urllib.urlopen(url).read()
  55.  
  56. def _txt2url(txt):
  57. '''urllib.quote('abc def') -> 'abc%20def'
  58. '''
  59. return urllib.quote(txt)
  60.  
  61. def get_baidulrc_list(keyword):
  62. '''Get lyric list form mp3.baidu.com
  63. return lyric url list
  64. '''
  65. url = 'http://mp3.baidu.com/m?f=ms&tn=baidump3lyric&ct=150994944&lf=2&rn=10&word='
  66. url = url + _txt2url(_utf82gbk(keyword)) + '&lm=-1'
  67. cont = _docurl(url)
  68. ## find http://XXXX/XXXX.lrc from html
  69. lrc_list=re.findall(r'\http\S*?\.lrc',cont)
  70. return lrc_list
  71.  
  72.  
  73. def get_netbaidulrc(keyword):
  74. '''Get lyric form mp3.baidu.com
  75. return lyric formed
  76. '''
  77. lrc_list = get_baidulrc_list(keyword)
  78. if (len(lrc_list) > 0):
  79. url = lrc_list[0]
  80. lyric = _gbk2utf8(_docurl(lrc_list[0]))
  81. return lyric
  82.  
  83. return ''
  84.  
  85. def get_netlyric(keyword):
  86. '''Get lyric content from network
  87. return lyric formed
  88. '''
  89. return get_netbaidulrc(keyword)
  90.  
  91. def getlyric(keyword):
  92. '''Get lyric content, with local cache
  93. it will use local cache first
  94. return lyric formed
  95. '''
  96. lyricfile = os.path.join(lyricdir, keyword+'.lrc')
  97. if os.path.isfile(lyricfile):
  98. return open(lyricfile).read()
  99. lyric = get_netlyric(keyword)
  100. if (lyric==''): return ''
  101. open(lyricfile,'w').write(lyric)
  102. return lyric
  103.  
  104. def getlyric_plain(keyword):
  105. '''get plain formatted lyric
  106. the content removed [00:12.53][03:45.26]
  107. it will use local cache first
  108. return lyric plain
  109. '''
  110. lyric = getlyric(keyword)
  111. lyric = re.sub(r'\[.*\]','',lyric)
  112. return re.sub('\r\n\r\n','',lyric)
  113.  
  114. def regetlyric(keyword):
  115. '''Reget lyric from network
  116. it will reget and sync local cache
  117. return lyric plain
  118. '''
  119. lyricfile = os.path.join(lyricdir, keyword+'.lrc')
  120. lyric = get_netlyric(keyword)
  121. if (lyric==''): return getlyric(keyword)
  122. open(lyricfile,'w').write(lyric)
  123. return lyric
  124.  
  125. def regetlyric_plain(keyword):
  126. '''Reget lyric from network
  127. the content removed [00:12.53][03:45.26]
  128. it will use local cache first
  129. return get plain formatted lyric
  130. '''
  131. lyric = getlyric(keyword)
  132. lyric = re.sub(r'\[.*\]','',lyric)
  133. return re.sub('\r\n\r\n','',lyric)
  134.  
  135. ## test command
  136. if __name__=="__main__":
  137. if (sys.argv[1] == '-h'):
  138. print 'Get the Lyric from web'
  139. print 'Usage:', sys.argv[0], ''''[options] KEYWORD\noptions:
  140. -h print help
  141. -p plain formatted
  142. -c reget lyric from network'''
  143. elif (sys.argv[1] == '-p'):
  144. print getlyric_plain(sys.argv[2])
  145. elif (sys.argv[1] == '-c'):
  146. print regetlyric(sys.argv[2])
  147. else:
  148. print getlyric(sys.argv[1])
  149. keyword = sys.argv[1]

提交下面的校正或者修改. (点击这里开始一个新的帖子)
姓名: 在 cookie 中记住我的名字

屏幕抓图:(jpeg 或 png)