-
Notifications
You must be signed in to change notification settings - Fork 674
Open
Labels
Description
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import os
from datetime import datetime, timedelta
def generate_date_range(start_date, end_date):
"""Generates a list of weekly dates from start_date to end_date (inclusive)."""
date_list = []
current_date = start_date
while current_date <= end_date:
date_list.append(current_date.strftime('%Y-%m-%d'))
current_date += timedelta(days=7)
if date_list[-1] != end_date.strftime('%Y-%m-%d'):
date_list.append(end_date.strftime('%Y-%m-%d'))
return date_list
def fetch_data(date):
url = f'https://www.billboard.com/charts/hot-100/{date}/'
print(f'Fetching data for {date}...')
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)
try:
WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'li.o-chart-results-list__item h3.c-title'))
)
except Exception as e:
print(f"Page did not load in time: {e}")
driver.quit()
return None
html = driver.page_source
driver.quit()
return html
def parse_data(html, tracked_artist):
soup = BeautifulSoup(html, 'html.parser')
songs = []
chart_items = soup.select('li.o-chart-results-list__item h3.c-title')
print(f"Found {len(chart_items)} chart items.")
for index, item in enumerate(chart_items):
title = item.text.strip()
artist_element = item.find_next('span', class_='c-label')
artist = artist_element.text.strip() if artist_element else ''
if tracked_artist.lower() in artist.lower():
position = index + 1
full_title = f"{title} ({artist})"
songs.append([position, full_title])
print(f"Found song: {full_title} at position {position}")
return songs
def save_to_excel(all_songs, date_range):
desktop_path = os.path.expanduser('~/Desktop')
filename = f'{desktop_path}/Tracked_Songs_Hot_100.xlsx'
# Generate dynamic headers based on found data
all_found_titles = {song[1] for songs in all_songs for song in songs}
tracked_headers = list(all_found_titles) # Use full titles with artists as headers
columns = ['Chart Date'] + tracked_headers
all_data = []
for date, songs in zip(date_range, all_songs):
row = {'Chart Date': date}
song_positions = {header: 'N/A' for header in tracked_headers}
for song in songs:
title_with_artist = song[1]
position = song[0]
if title_with_artist in song_positions:
song_positions[title_with_artist] = position
row.update(song_positions)
all_data.append(row)
df = pd.DataFrame(all_data, columns=columns)
if not df.empty:
try:
df.to_excel(filename, index=False)
print(f'Data saved to {filename}')
except Exception as e:
print(f'Error saving file: {e}')
else:
print("No data to save.")
def main():
start_date = datetime(2024, 12, 14) # New range: from December 14, 2024
end_date = datetime(2025, 6, 21) # to June 21, 2025
date_range = generate_date_range(start_date, end_date)
tracked_artist = "Sabrina Carpenter"
all_songs = []
for date in date_range:
html_data = fetch_data(date)
if html_data:
songs = parse_data(html_data, tracked_artist)
all_songs.append(songs)
else:
all_songs.append([])
if all_songs:
save_to_excel(all_songs, date_range)
else:
print("No songs by the artist were found in any chart entry.")
if name == 'main':
main()