How Long is Too Long?

#Discover API url and headers
url = 'https://api.themoviedb.org/3/discover/movie'
headers = {'Authorization': 'Bearer {}'.format(access_token),
'Content-Type': 'application/json;charset=utf-8'}
def get_num_pages(url, headers, start_date, end_date):
"""
Takes as input an API url, headers containing
authentication information, a start date, and end date.
Returns the number of pages of results returned by the
API call as an int.
"""
params = {'release_date.gte': start_date,
'release_date.lte': end_date}
returned_movies = requests.get(url=url, headers=headers,
params=params).json()
return returned_movies['total_pages']
def get_movies_data(start_date, end_date, url, headers):
"""
Takes a start date, end date, API url, and headers with
authentication information.
Uses get_num_pages function to check the number of pages
returned by the API.
Loops through all pages, requesting data from API, concatenating
results to a dataframe.
Returns dataframe of movie information between start and end
date.
"""
df = pd.DataFrame()
num_pages = get_num_pages(url, headers, start_date, end_date)
for i in range(1, num_pages+1):
parameters = {'release_date.gte': start_date,
'release_data.lte': end_date,
'page': i}
request = requests.get(url, headers=headers,
params=parameters).json()
df = pd.concat([df, pd.DataFrame(request['results'])],
sort=False)
return df
def create_quarter_date_list(year):
"""
Returns a list of quarterly start dates and a list of
quarterly end dates from int input representing year.
"""
start_dates = [f'{year}-01-01', f'{year}-04-01',
f'{year}-07-01', f'{year}-10-01']
end_dates = [f'{year}-03-31', f'{year}-06-30',
f'{year}-09-30', f'{year}-12-31']
return start_dates, end_dates
#create start and end dates for 2000 to 2020
start_dates = []
end_dates = []
for year in range(2000, 2021):
start_date, end_date = create_quarter_date_list(year)
start_dates += start_date
end_dates += end_date
#get movie data and concat to current dataframe
df = pd.DataFrame()
url = 'https://api.themoviedb.org/3/discover/movie'
#loop through all start and end dates and make an API call for each date range
#append results to df
for i, start_date in enumerate(start_dates):
temp_df = get_movies_data(start_date=start_date,
end_date=end_dates[i],
url=url,
headers=headers)
df = pd.concat([df, temp_df], sort=False)
DataFrame showing results of first API calls
url = 'https://api.themoviedb.org/3/movie'
movie_details = []#loop through all movie IDs, request details, and append to movie_details
for index, movie_id in enumerate(movies['id']):
response = requests.get(f'{url}/{movie_id}',
headers=headers).json()
movie_details.append(response)
update_progress(index / len(movies['id']))
movies_df = pd.DataFrame(movie_details)
movies_df.info()
movies_df['imdb_id'].fillna('missing', inplace=True)
movies_df['runtime'].fillna(movies_df['runtime'].median(),
inplace=True)
runtime_df = runtime_df.loc[runtime_df['revenue'] > 0]
movies_lt_30 = runtime_df.loc[runtime_df['runtime'] <= 30]
movies_lt_60 = runtime_df.loc[(runtime_df['runtime'] > 30) &
(runtime_df['runtime'] <= 60)]
movies_lt_90 = runtime_df.loc[(runtime_df['runtime'] > 60) &
(runtime_df['runtime'] <= 90)]
movies_lt_120 = runtime_df.loc[(runtime_df['runtime'] > 90) &
(runtime_df['runtime'] <= 120)]
movies_lt_150 = runtime_df.loc[(runtime_df['runtime'] > 120) &
(runtime_df['runtime'] <= 150)]
movies_lt_180 = runtime_df.loc[(runtime_df['runtime'] > 150) &
(runtime_df['runtime'] <= 180)]
movies_gt_180 = runtime_df.loc[(runtime_df['runtime'] > 180)]

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store