1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| from urllib.request import HTTPError
for post_number in range(10):
try:
url = f'https://minyeamer.tistory.com/{post_number}'
web = urlopen(url)
source = BeautifulSoup(web, 'html.parser')
except HTTPError:
print(f'{i}번 글에서 에러가 발생했습니다.')
pass
with open('tistory_all.txt', 'a', encoding = 'utf-8') as f:
all_text = source.find('article',{'class': 'content'})
tags = ['h2', 'h3', 'h4', 'li', 'p', 'blockquote', 'code']
article = all_text.find_all(tags)
for content in article:
f.write(content.get_text() + '\n')
|