From 2a9a0320eb3239b278e6ff75e85c39cd137f5050 Mon Sep 17 00:00:00 2001
From: Zhouzuo <2232969133@qq.com>
Date: Mon, 17 Apr 2017 22:42:55 +0800
Subject: [PATCH 1/2] Update SBB.py

python 2 change python 3
---
 SBB.py | 50 ++++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 24 deletions(-)
diff --git a/SBB.py b/SBB.py
index 13ff48a..6683ec4 100644
--- a/SBB.py
+++ b/SBB.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-__version__ = '0.01'
+__version__ = '0.02'
 __author__  = 'Julien G. (@bfishadow)'
 
 '''
@@ -10,20 +10,20 @@
 Or simply save them anywhere as archives.
 '''
 
-import sys, urllib2
+import sys, urllib.request 
 from time import strftime
 
 def getBetween(str, str1, str2):
   strOutput = str[str.find(str1)+len(str1):str.find(str2)]
   return strOutput
 
-strUsage = "Usage: SBB.py <Sina blog URL> [asc]\n\nExample:\nSBB.py http://blog.sina.com.cn/gongmin desc\nSBB.py http://blog.sina.com.cn/u/1239657051\n"
+strUsage =  "Usage: SBB.py <Sina blog URL> [asc]\n\nExample:\nSBB.py http://blog.sina.com.cn/gongmin desc\nSBB.py http://blog.sina.com.cn/u/1239657051\n"
 
 #Step 0: get target blog homepage URL
 try :
-  strUserInput = sys.argv[1]
+  strUserInput =sys.argv[1]
 except :
-  print strUsage
+  print (strUsage)
   sys.exit(0)
 
 try :
@@ -33,18 +33,20 @@ def getBetween(str, str1, str2):
 
 #The URL *must* start with http://blog.sina.com.cn/, otherwise the universe will be destroied XD
 if strUserInput.find("http://blog.sina.com.cn/") == -1 or len(strUserInput) <= 24 :
-  print strUsage
+  print(strUsage)
+  print(strUserInput)
   sys.exit(0)
 
 #Get UID for the blog, UID is critical.
-objResponse = urllib2.urlopen(strUserInput)
-strResponse = objResponse.read()
+objResponse = urllib.request.urlopen(strUserInput)
+strResponse = objResponse.read().decode('utf-8')
 objResponse.close()
 
 strUID = getBetween(getBetween(strResponse, "format=html5;", "format=wml;"), "/blog/u/", '">')
-
+print('用户ID '+strUID)
+ 
 if len(strUID) > 10 :
-  print strUsage
+  print(strUsage)
   sys.exit(0)
 
 #Here's the UID. Most of the UID is a string of ten digits.
@@ -54,8 +56,8 @@ def getBetween(str, str1, str2):
 #Step 1: get list for first page and article count
 strTargetBlogListURL = "http://blog.sina.com.cn/s/articlelist_" + strTargetUID + "_0_1.html"
 
-objResponse = urllib2.urlopen(strTargetBlogListURL)
-strResponse = objResponse.read()
+objResponse = urllib.request.urlopen(strTargetBlogListURL)
+strResponse = objResponse.read().decode('utf-8')
 objResponse.close()
 
 strBlogPostList = getBetween(getBetween(strResponse,"$blogArticleSortArticleids","$blogArticleCategoryids"), " : [", "],")
@@ -71,8 +73,8 @@ def getBetween(str, str1, str2):
 #Step 2: get list for the rest of pages
 for intCurrentPage in range(intPageCount - 1) :
   strTargetBlogListURL = "http://blog.sina.com.cn/s/articlelist_" + strTargetUID + "_0_" + str(intCurrentPage + 2) + ".html"
-  objResponse = urllib2.urlopen(strTargetBlogListURL)
-  strResponse = objResponse.read()
+  objResponse = urllib.request.urlopen(strTargetBlogListURL)
+  strResponse = objResponse.read().decode('utf-8')
   strBlogPostList = getBetween(getBetween(strResponse,"$blogArticleSortArticleids","$blogArticleCategoryids"), " : [", "],")
   strBlogPostID = strBlogPostID + "," + strBlogPostList
   objResponse.close()
@@ -93,8 +95,8 @@ def getBetween(str, str1, str2):
 for strCurrentBlogPostID in arrBlogPost :
   intCounter  = intCounter + 1
   strTargetBlogPostURL = "http://blog.sina.com.cn/s/blog_" + strCurrentBlogPostID + ".html"
-  objResponse = urllib2.urlopen(strTargetBlogPostURL)
-  strPageCode = objResponse.read()
+  objResponse = urllib.request.urlopen(strTargetBlogPostURL)
+  strPageCode = objResponse.read().decode('utf-8')
   objResponse.close()
 
   #Parse blog title
@@ -113,17 +115,17 @@ def getBetween(str, str1, str2):
 
   #Write into local file
   strLocalFilename = "Post_" + str(intCounter) + "_" + strCurrentBlogPostID + ".html"
-  strHTML4Post = "<html>\n<head>\n<meta http-equiv=""Content-Type"" content=""text/html; charset=utf-8"" />\n<title>" + strBlogPostTitle + "</title>\n<link href=""http://simg.sinajs.cn/blog7style/css/conf/blog/article.css"" type=""text/css"" rel=""stylesheet"" />\n</head>\n<body>\n<h2>" + strBlogPostTitle + "</h2>\n<p>By: <em>" + strBlogName + "</em> 原文发布于：<em>" + strBlogPostTime + "</em></p>\n" + strBlogPostBody + "\n<p><a href=""index.html"">返回目录</a></p>\n</body>\n</html>"
-  objFileArticle = open(strLocalFilename, "w")
-  objFileArticle.write(strHTML4Post);
+  strHTML4Post = '<html>\n<head>\n<meta charset="utf-8" />\n<title>' + strBlogPostTitle + '</title>\n<link href="http://simg.sinajs.cn/blog7style/css/conf/blog/article.css" type="text/css" rel="stylesheet" />\n</head>\n<body>\n<h2>' + strBlogPostTitle + "</h2>\n<p>By: <em>" + strBlogName + "</em> 原文发布于：<em>" + strBlogPostTime + "</em></p>\n" + strBlogPostBody + '\n<p><a href="index.html">返回目录</a></p>\n</body>\n</html>\n'
+  objFileArticle = open(strLocalFilename, "wb")
+  objFileArticle.write(strHTML4Post.encode('utf-8'));
   objFileArticle.close
 
   strHTML4Index = strHTML4Index + '<li><a href="' + strLocalFilename + '">' + strBlogPostTitle + '</a></li>\n'
 
-  print intCounter , "/", intBlogPostCount
+  print (intCounter , "/", intBlogPostCount)
 
 strCurrentTimestamp = str(strftime("%Y-%m-%d %H:%M:%S"))
-strHTML4Index = "<html>\n<head>\n<meta http-equiv=""Content-Type"" content=""text/html; charset=utf-8"" />\n<title>" + strBlogName + "博客文章汇总</title>\n</head>\n<body>\n<h2>新浪博客：" + strBlogName + "</h2>\n<p>共" + str(intBlogPostCount) + "篇文章，最后更新：<em>" + strCurrentTimestamp + "</em></p>\n<ol>\n" + strHTML4Index + "\n</ol>\n</body>\n</html>"
-objFileIndex = open("index.html", "w")
-objFileIndex.write(strHTML4Index);
+strHTML4Index = '<html>\n<head>\n<meta charset="utf-8" />\n<title>' + strBlogName + "博客文章汇总</title>\n</head>\n<body>\n<h2>新浪博客：" + strBlogName + "</h2>\n<p>共" + str(intBlogPostCount) + "篇文章，最后更新：<em>" + strCurrentTimestamp + "</em></p>\n<ol>\n" + strHTML4Index + "\n</ol>\n</body>\n</html>\n"
+objFileIndex = open("index.html", "wb")
+objFileIndex.write(strHTML4Index.encode('utf-8'));
 objFileIndex.close

From 9b636e1059da9a62c4546a5612c92ccbc8b0fb11 Mon Sep 17 00:00:00 2001
From: Zhouzuo <2232969133@qq.com>
Date: Mon, 17 Apr 2017 22:49:06 +0800
Subject: [PATCH 2/2] Update Readme.md

---
 Readme.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Readme.md b/Readme.md
index 6375430..d0ccf26 100644
--- a/Readme.md
+++ b/Readme.md
@@ -4,7 +4,7 @@
 
 Based on these downloaded HTML files, you may generate an ebook by importing into [Calibre](http://calibre-ebook.com/). Or, you can simply save them anywhere as archives.
 
-Tested with Python 2.7.8
+Tested with Python 3.5.2
 
 ## Usage
 SBB.py (Sina Blog URL) (asc|desc)
@@ -27,7 +27,7 @@ Licensed under the Apache License, Version 2.0
 
 ##Change log
 
-###Feb 15, 2015
+###April 17, 2017
 
 - [ADDED] timestamp for index and articles.
 - [ADDED] sort option. Ascending by default.
@@ -38,7 +38,7 @@ Licensed under the Apache License, Version 2.0
 
 基于这些下载来的 HTML 文件，您可以借助 [Calibre](http://calibre-ebook.com/) 来生成电子书，或者当作存档。
 
-请在 Python 2.7.8 下使用。
+请在 Python 3.5.2 下使用。
 
 ## 用法
 SBB.py (新浪博客地址) (desc|asc)
@@ -61,7 +61,7 @@ Licensed under the Apache License, Version 2.0
 
 ##升级日志
 
-###2015年2月15日
+###2017年4月17日
 
 - [增加] 索引页面和文章页面增加时间戳。
 - [增加] 文章排序选项，默认按发表时间顺序排列。