import requests from bs4 import BeautifulSoup def get_movies(): headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 'Host':'movie.douban.com'
} movie_list=[] for i in range(10): link='https://movie.douban.com/top250?start='+str(i*25) r=requests.get(link,headers=headers,timeout=10) print(str(i+1),"page status_ocde:",r.status_code) soup=BeautifulSoup(r.text,'lxml') div_list=soup.find_all('div',class_='hd') for each in div_list: movie=each.a.span.text.strip() movie_list.append(movie) return movie_list movies=get_movies() f=open('豆瓣电影TOP250.txt','a') for i in movies: f.write(i+'\n') f.close()