这是今天折腾自己的一个项目,,从数据采集–google翻译–wordpress发布,全部用python打通了。
最终代码如下:
1 |
import requests |
2 |
import json |
3 |
from bs4 import BeautifulSoup |
4 |
import xlwt |
5 |
from wordpress_xmlrpc import Client, WordPressPost, WordPressTerm |
6 |
from wordpress_xmlrpc.methods.posts import GetPosts, NewPost |
7 |
from wordpress_xmlrpc.methods.users import GetUserInfo |
8 |
from wordpress_xmlrpc.methods import taxonomies |
9 |
import csv |
10 |
11 |
def getHTMLText(url): |
12 |
try: |
13 |
r = requests.get(url, timeout=30) |
14 |
r.raise_for_status() |
15 |
return r.text |
16 |
except: |
17 |
print ( "Get HTML Text Failed!" ) |
18 |
return 0 |
19 |
20 |
21 |
def google_translate_EtoC(to_translate, from_language= "en" , to_language= "ch-CN" ): |
22 |
# 根据参数生产提交的网址 |
23 |
base_url = "https://translate.google.cn/m?hl={}&sl={}&ie=UTF-8&q={}" |
24 |
url = base_url.format(to_language, from_language, to_translate) |
25 |
26 |
# 获取网页 |
27 |
html = getHTMLText(url) |
28 |
if html: |
29 |
soup = BeautifulSoup(html, "html.parser" ) |
30 |
31 |
# 解析网页得到翻译结果 |
32 |
try: |
33 |
result = soup.find_all( "div" , { "class" : "t0" })[0].text |
34 |
except: |
35 |
print ( "Translation Failed!" ) |
36 |
result = "" |
37 |
38 |
return result |
39 |
40 |
headers = { 'Host' : 'www.zalora.com.hk' , |
41 |
'Connection' : 'keep-alive' , |
42 |
'Cache-Control' : 'max-age=0' , |
43 |
'Accept' : 'text/html, */*; q=0.01' , |
44 |
'X-Requested-With' : 'XMLHttpRequest' , |
45 |
'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36' , |
46 |
'DNT' : '1' , |
47 |
'Referer' : 'http://example.com/' , |
48 |
'Accept-Encoding' : 'gzip, deflate, sdch' , |
49 |
'Accept-Language' : 'zh-CN,zh;q=0.8,ja;q=0.6' |
50 |
} |
51 |
52 |
file = xlwt.Workbook() |
53 |
table = file.add_sheet( 'info' , cell_overwrite_ok=True) |
54 |
x=0 |
55 |
wp = Client( 'http://www.xxx.cc/xmlrpc.php' , 'user' , 'pass' ) |
57 |
with open( "urllist2.txt" , "r" ) as f: |
58 |
for line in f.readlines(): |
59 |
url = base_url+line |
60 |
res = requests.get(url, headers=headers) |
61 |
json_data = json.loads(res.text) |
62 |
sku = json_data[ 'data' ][ 'sku_config' ] |
63 |
name = json_data[ 'data' ][ 'product_name' ] |
64 |
catogery = json_data[ 'data' ][ 'bread_crumb' ][2][ 'value' ] |
65 |
price = json_data[ 'data' ][ 'price' ].replace( "HK$" , "" ) |
66 |
productDesc = json_data[ 'data' ][ 'short_description' ] |
67 |
color = json_data[ 'data' ][ 'attributes' ][1][ 'value' ] |
68 |
Care_label = json_data[ 'data' ][ 'attributes' ][2][ 'value' ] |
69 |
model_body = json_data[ 'data' ][ 'size_attributes' ][0][ 'value' ] |
70 |
model_garment = json_data[ 'data' ][ 'size_attributes' ][1][ 'value' ] |
71 |
size = json_data[ 'data' ][ 'size_attributes' ][2][ 'value' ] |
72 |
image = json_data[ 'data' ][ 'product_images' ][3][ 'product_image' ] |
73 |
returnable = json_data[ 'data' ][ 'return_info_text' ] |
74 |
# file.save( '02.xls' ) |
75 |
"" " |
76 |
发表博文 |
77 |
"" " |
78 |
post = WordPressPost() |
79 |
post.title = google_translate_EtoC(name) |
80 |
post.content = catogery+price+google_translate_EtoC(productDesc) |
81 |
post.post_status = 'publish' |
82 |
post.terms_names = { |
83 |
'post_tag' : [ 'test' , 'firstpost' ], |
84 |
'category' : [ ' Titika' , 'Titika' ] |
85 |
} |
86 |
87 |
wp.call(NewPost(post)) |
本文暂无标签