import re
import csv
with open('source.txt', 'r', encoding="utf-8") as f:
source = f.read()
result_list = []
username_list = re.findall("username='(.*?)'", source, re.S)
content_list = re.findall('j_d_post_content">(.*?)<', source, re.S)
reply_time_list =re.findall('class="tail-info">(2021.*?)<', source, re.S)
for i in range(len(username_list)):
result = {'username': username_list[i],
'content': content_list[i],
'reply_time': reply_time_list[1]}
result_list.append(result)
with open('tieba.csv', 'w', encoding='uft-8')as f:
writer = csv.DictWriter(f, fieldnames=['username', 'content', 'reply_time'])
writer.writeheader()
writer.writerows(result_list)
every_reply = re.findall('l_post l_post_bright j_l_post clearfix"(.*?)"p_props_tail props_appraise_wrap', source, re.S)
for each in every_reply:
result = {}
result_list.append(result)
with open('tieba.csv', 'w', encoding='UTF-8')as f:
writer = csv.DictWriter(f, fieldnames=['username', 'content', 'reply_time'])
writer.writeheader()
writer.writerows(result_list)
报错区
C\Users\12235\AppData\Local\Programs\Python\Python310\C:/Users/12235/AppData/Local/TempTraceback (most recent call last):
File "C:\Users\12235\AppData\Local\Temp\tieba.py", line 4, in <module>
source = f.read()
File "C:\Users\12235\AppData\Local\Programs\Python\Python310\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 78: invalid start byte
Process finished with exit code 1
import csv
with open('source.txt', 'r', encoding="utf-8") as f:
source = f.read()
result_list = []
username_list = re.findall("username='(.*?)'", source, re.S)
content_list = re.findall('j_d_post_content">(.*?)<', source, re.S)
reply_time_list =re.findall('class="tail-info">(2021.*?)<', source, re.S)
for i in range(len(username_list)):
result = {'username': username_list[i],
'content': content_list[i],
'reply_time': reply_time_list[1]}
result_list.append(result)
with open('tieba.csv', 'w', encoding='uft-8')as f:
writer = csv.DictWriter(f, fieldnames=['username', 'content', 'reply_time'])
writer.writeheader()
writer.writerows(result_list)
every_reply = re.findall('l_post l_post_bright j_l_post clearfix"(.*?)"p_props_tail props_appraise_wrap', source, re.S)
for each in every_reply:
result = {}
result_list.append(result)
with open('tieba.csv', 'w', encoding='UTF-8')as f:
writer = csv.DictWriter(f, fieldnames=['username', 'content', 'reply_time'])
writer.writeheader()
writer.writerows(result_list)
报错区
C\Users\12235\AppData\Local\Programs\Python\Python310\C:/Users/12235/AppData/Local/TempTraceback (most recent call last):
File "C:\Users\12235\AppData\Local\Temp\tieba.py", line 4, in <module>
source = f.read()
File "C:\Users\12235\AppData\Local\Programs\Python\Python310\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 78: invalid start byte
Process finished with exit code 1