Format: 代码格式化

master
chenjiandongx 5 years ago
parent bef835cf38
commit 7bad5c325c

@ -1,9 +1,15 @@
#!/usr/bin/env python
# coding=utf-8
import csv
import json
from pprint import pprint
import requests
class Cups():
class Cups:
def __init__(self, url, page, path):
self._url = url
@ -12,10 +18,14 @@ class Cups():
self._result = set()
def run(self):
""" 启动爬虫 """
headers = {'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/56.0.2924.87 Safari/537.36'}
"""
启动爬虫
"""
headers = {
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
urls = []
urls.extend([self._url.format(1, p, 0) for p in range(1, self._page)])
urls.extend([self._url.format(1, p, 1) for p in range(1, self._page)])
@ -25,8 +35,11 @@ class Cups():
try:
j = json.loads(requests.get(u, headers=headers, timeout=2).text[15:])
for i, v in enumerate(j['rateList']):
goods = (v['rateDate'], v['auctionSku'],
v['rateContent'].replace("<b>", "").replace("</b>", "").replace("&hellip", ""))
goods = (
v['rateDate'],
v['auctionSku'],
v['rateContent'].replace("<b>", "").replace("</b>", "").replace("&hellip", "")
)
self._result.add(goods)
print(i)
except Exception as e:
@ -36,18 +49,23 @@ class Cups():
self.save()
def save(self):
""" 保存数据到本地 """
"""
保存数据到本地
"""
with open(self._path, "w+", encoding="utf-8") as f:
f_csv = csv.writer(f)
f_csv.writerows(self._result)
def clear(self):
""" 数据去重 """
"""
数据去重
"""
s = set()
with open(self._path, "r", encoding="utf-8") as f:
fin_csv = csv.reader(f)
for row in fin_csv:
s.add(tuple(row))
with open("cup_all.csv", "w+", encoding="utf-8") as f:
fout_csv = csv.writer(f)
fout_csv.writerows(s)
@ -55,7 +73,9 @@ class Cups():
@staticmethod
def extract():
""" 提取数据 """
"""
提取数据
"""
# datelst, size_colorlst, commentlst = [], [], []
# with open("cup_all.csv", "r", encoding="utf-8") as f:
# fin_csv = csv.reader(f)
@ -67,6 +87,7 @@ class Cups():
# with open("comment.txt", "w+", encoding="utf-8") as f:
# for r in commentlst:
# f.write(r + "\n")
with open(r"data_/size_color.txt", "r", encoding="utf-8") as fin:
rows = fin.readlines()
lst = []
@ -75,6 +96,7 @@ class Cups():
with open(r"data_/size.txt", "w+", encoding="utf-8") as fout:
fout.writelines(lst)
if __name__ == "__main__":
url = "https://rate.tmall.com/list_detail_rate.htm?itemId=37457670144&spuId=249827344&" \
@ -82,4 +104,4 @@ if __name__ == "__main__":
cups = Cups(url, 101, "cups.csv")
# cups.run()
# cups.clear()
cups.extract()
cups.extract()

Loading…
Cancel
Save