from pathlib import Path
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
sns.set()
here = Path(".")
data = here / "data"
data.mkdir(exist_ok=True, parents=True)
# This is a dataframe (like tibble).
spotify_songs = pd.read_csv(data / "spotify_songs.csv")
spotify_songs.to_csv(data / 'spotify_songs.csv', index=False)
spotify_songs
| track_id | track_name | track_artist | track_popularity | track_album_id | track_album_name | track_album_release_date | playlist_name | playlist_id | playlist_genre | ... | key | loudness | mode | speechiness | acousticness | instrumentalness | liveness | valence | tempo | duration_ms | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 6f807x0ima9a1j3VPbc7VN | I Don't Care (with Justin Bieber) - Loud Luxur... | Ed Sheeran | 66 | 2oCs0DGTsRO98Gh5ZSl2Cx | I Don't Care (with Justin Bieber) [Loud Luxury... | 2019-06-14 | Pop Remix | 37i9dQZF1DXcZDD7cfEKhW | pop | ... | 6 | -2.634 | 1 | 0.0583 | 0.102000 | 0.000000 | 0.0653 | 0.5180 | 122.036 | 194754 |
| 1 | 0r7CVbZTWZgbTCYdfa2P31 | Memories - Dillon Francis Remix | Maroon 5 | 67 | 63rPSO264uRjW1X5E6cWv6 | Memories (Dillon Francis Remix) | 2019-12-13 | Pop Remix | 37i9dQZF1DXcZDD7cfEKhW | pop | ... | 11 | -4.969 | 1 | 0.0373 | 0.072400 | 0.004210 | 0.3570 | 0.6930 | 99.972 | 162600 |
| 2 | 1z1Hg7Vb0AhHDiEmnDE79l | All the Time - Don Diablo Remix | Zara Larsson | 70 | 1HoSmj2eLcsrR0vE9gThr4 | All the Time (Don Diablo Remix) | 2019-07-05 | Pop Remix | 37i9dQZF1DXcZDD7cfEKhW | pop | ... | 1 | -3.432 | 0 | 0.0742 | 0.079400 | 0.000023 | 0.1100 | 0.6130 | 124.008 | 176616 |
| 3 | 75FpbthrwQmzHlBJLuGdC7 | Call You Mine - Keanu Silva Remix | The Chainsmokers | 60 | 1nqYsOef1yKKuGOVchbsk6 | Call You Mine - The Remixes | 2019-07-19 | Pop Remix | 37i9dQZF1DXcZDD7cfEKhW | pop | ... | 7 | -3.778 | 1 | 0.1020 | 0.028700 | 0.000009 | 0.2040 | 0.2770 | 121.956 | 169093 |
| 4 | 1e8PAfcKUYoKkxPhrHqw4x | Someone You Loved - Future Humans Remix | Lewis Capaldi | 69 | 7m7vv9wlQ4i0LFuJiE2zsQ | Someone You Loved (Future Humans Remix) | 2019-03-05 | Pop Remix | 37i9dQZF1DXcZDD7cfEKhW | pop | ... | 1 | -4.672 | 1 | 0.0359 | 0.080300 | 0.000000 | 0.0833 | 0.7250 | 123.976 | 189052 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32828 | 7bxnKAamR3snQ1VGLuVfC1 | City Of Lights - Official Radio Edit | Lush & Simon | 42 | 2azRoBBWEEEYhqV6sb7JrT | City Of Lights (Vocal Mix) | 2014-04-28 | ♥ EDM LOVE 2020 | 6jI1gFr6ANFtT8MmTvA2Ux | edm | ... | 2 | -1.814 | 1 | 0.0936 | 0.076600 | 0.000000 | 0.0668 | 0.2100 | 128.170 | 204375 |
| 32829 | 5Aevni09Em4575077nkWHz | Closer - Sultan & Ned Shepard Remix | Tegan and Sara | 20 | 6kD6KLxj7s8eCE3ABvAyf5 | Closer Remixed | 2013-03-08 | ♥ EDM LOVE 2020 | 6jI1gFr6ANFtT8MmTvA2Ux | edm | ... | 0 | -4.462 | 1 | 0.0420 | 0.001710 | 0.004270 | 0.3750 | 0.4000 | 128.041 | 353120 |
| 32830 | 7ImMqPP3Q1yfUHvsdn7wEo | Sweet Surrender - Radio Edit | Starkillers | 14 | 0ltWNSY9JgxoIZO4VzuCa6 | Sweet Surrender (Radio Edit) | 2014-04-21 | ♥ EDM LOVE 2020 | 6jI1gFr6ANFtT8MmTvA2Ux | edm | ... | 6 | -4.899 | 0 | 0.0481 | 0.108000 | 0.000001 | 0.1500 | 0.4360 | 127.989 | 210112 |
| 32831 | 2m69mhnfQ1Oq6lGtXuYhgX | Only For You - Maor Levi Remix | Mat Zo | 15 | 1fGrOkHnHJcStl14zNx8Jy | Only For You (Remixes) | 2014-01-01 | ♥ EDM LOVE 2020 | 6jI1gFr6ANFtT8MmTvA2Ux | edm | ... | 2 | -3.361 | 1 | 0.1090 | 0.007920 | 0.127000 | 0.3430 | 0.3080 | 128.008 | 367432 |
| 32832 | 29zWqhca3zt5NsckZqDf6c | Typhoon - Original Mix | Julian Calor | 27 | 0X3mUOm6MhxR7PzxG95rAo | Typhoon/Storm | 2014-03-03 | ♥ EDM LOVE 2020 | 6jI1gFr6ANFtT8MmTvA2Ux | edm | ... | 5 | -4.571 | 0 | 0.0385 | 0.000133 | 0.341000 | 0.7420 | 0.0894 | 127.984 | 337500 |
32833 rows × 23 columns
## How many songs are in each genre?
spotify_songs.groupby('playlist_genre').size()
playlist_genre edm 6043 latin 5155 pop 5507 r&b 5431 rap 5746 rock 4951 dtype: int64
## What is average value of energy and acousticness in the latin genre in this dataset?
spotify_songs[spotify_songs['playlist_genre'] == 'latin'][['energy', 'acousticness']].mean()
## Calculate the average duration of song (in minutes) across all subgenres. Which subgenre has the longest song on average?
spotify_songs.groupby('playlist_subgenre')['duration_ms'].mean().sort_values(ascending=False) / 60000
# Make two boxplots side-by-side of the danceability of songs stratifying by whether a song has a fast or slow tempo.
# Define fast tempo as any song that has a tempo above its median value.
median = spotify_songs['tempo'].median()
spotify_songs['tempo_type'] = np.where(spotify_songs['tempo'] > median, 'fast', 'slow')
sns.boxplot(spotify_songs, x='tempo_type', y='danceability')
<AxesSubplot: xlabel='tempo_type', ylabel='danceability'>
sns.violinplot(spotify_songs, x='tempo_type', y='danceability')
<AxesSubplot: xlabel='tempo_type', ylabel='danceability'>
adata = sc.datasets.pbmc3k()
adata
100%|██████████| 5.58M/5.58M [00:01<00:00, 5.19MB/s]
AnnData object with n_obs × n_vars = 2700 × 32738
var: 'gene_ids'