作者 Huahua [actionscript] 2006-12-02 23:16 (点击下载)

  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. # Author: Huang Jiahua <jhuangjiahuaATgmail.com>
  4. # License: GNU LGPL
  5. # Last modified:
  6.  
  7. """Get the Lyric from web
  8. call getlyric.getlyric(keyword) will return lyric for keyword
  9. the keyword will be title, artist, etc
  10. """
  11. __revision__ = '0.1'
  12.  
  13. import os,sys,re,urllib
  14.  
  15.  
  16. homedir = os.environ['HOME']
  17. lyricdir = os.path.join(homedir,'.lyrics')
  18. if not os.path.isdir(lyricdir): os.mkdir(lyricdir)
  19.  
  20.  
  21.  
  22. ## gbk2utf8(docurl(mkurl(urllib.quote(keyword.decode('utf8').encode('gbk')))))
  23.  
  24. def _gbk2utf8(stri):
  25. '''iconv gbk -> utf8
  26. errors be given to set 'ignore' or 'replace' handling scheme
  27. '''
  28. stri = stri.decode('gb18030','replace').encode('utf8','replace')
  29. return stri
  30.  
  31. def _utf82gbk(stri):
  32. '''iconv utf8 -> gb18030
  33. errors be given to set 'ignore' or 'replace' handling scheme
  34. '''
  35. stri = stri.decode('utf8','replace').encode('gb18030','replace')
  36. return stri
  37.  
  38. def _docurl(url):
  39. '''get content from url
  40. '''
  41. return urllib.urlopen(url).read()
  42.  
  43. def _txt2url(txt):
  44. '''urllib.quote('abc def') -> 'abc%20def'
  45. '''
  46. return urllib.quote(txt)
  47.  
  48. def get_baidulrc_list(keyword):
  49. '''Get lyric list form mp3.baidu.com
  50. return lyric url list
  51. '''
  52. url = 'http://mp3.baidu.com/m?f=ms&tn=baidump3lyric&ct=150994944&lf=2&rn=10&word='
  53. url = url + _txt2url(_utf82gbk(keyword)) + '&lm=-1'
  54. cont = _docurl(url)
  55. ## find http://XXXX/XXXX.lrc from html
  56. lrc_list=re.findall(r'\http\S*?\.lrc',cont)
  57. return lrc_list
  58.  
  59.  
  60. def get_netbaidulrc(keyword):
  61. '''Get lyric form mp3.baidu.com
  62. return lyric formed
  63. '''
  64. lrc_list = get_baidulrc_list(keyword)
  65. if (len(lrc_list) > 0):
  66. url = lrc_list[0]
  67. lyric = _gbk2utf8(_docurl(lrc_list[0]))
  68. return lyric
  69.  
  70. return ''
  71.  
  72. def get_netlyric(keyword):
  73. '''Get lyric content from network
  74. return lyric formed
  75. '''
  76. return get_netbaidulrc(keyword)
  77.  
  78. def getlyric(keyword):
  79. '''Get lyric content, with local cache
  80. it will use local cache first
  81. return lyric formed
  82. '''
  83. lyricfile = os.path.join(lyricdir, keyword+'.lrc')
  84. if os.path.isfile(lyricfile):
  85. return open(lyricfile).read()
  86. lyric = get_netlyric(keyword)
  87. if (lyric==''): return ''
  88. open(lyricfile,'w').write(lyric)
  89. return lyric
  90.  
  91. def getlyric_plain(keyword):
  92. '''get plain formatted lyric
  93. the content removed [00:12.53][03:45.26]
  94. it will use local cache first
  95. return lyric plain
  96. '''
  97. lyric = getlyric(keyword)
  98. lyric = re.sub(r'\[.*\]','',lyric)
  99. return re.sub('\r\n\r\n','',lyric)
  100.  
  101. def regetlyric(keyword):
  102. '''Reget lyric from network
  103. it will reget and sync local cache
  104. return lyric plain
  105. '''
  106. lyricfile = os.path.join(lyricdir, keyword+'.lrc')
  107. lyric = get_netlyric(keyword)
  108. if (lyric==''): return getlyric(keyword)
  109. open(lyricfile,'w').write(lyric)
  110. return lyric
  111.  
  112. def regetlyric_plain(keyword):
  113. '''Reget lyric from network
  114. the content removed [00:12.53][03:45.26]
  115. it will use local cache first
  116. return get plain formatted lyric
  117. '''
  118. lyric = getlyric(keyword)
  119. lyric = re.sub(r'\[.*\]','',lyric)
  120. return re.sub('\r\n\r\n','',lyric)
  121.  
  122. ## test command
  123. if __name__=="__main__":
  124. if (sys.argv[1] == '-h'):
  125. print 'Get the Lyric from web'
  126. print 'Usage:', sys.argv[0], ''''[options] KEYWORD\noptions:
  127. -h print help
  128. -p plain formatted
  129. -c reget lyric from network'''
  130. elif (sys.argv[1] == '-p'):
  131. print getlyric_plain(sys.argv[2])
  132. elif (sys.argv[1] == '-c'):
  133. print regetlyric(sys.argv[2])
  134. else:
  135. print getlyric(sys.argv[1])
  136. keyword = sys.argv[1]

提交下面的校正或者修改. (点击这里开始一个新的帖子)
姓名: 在 cookie 中记住我的名字

屏幕抓图:(jpeg 或 png)