# for dataframe wrangling
import pandas as pd

answers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')
color_ranks = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')
users = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')

users['colorblind'].value_counts()

answers_w_colors = pd.merge(answers, color_ranks.drop('hex', axis=1), on='rank', how="inner")

users_answers = pd.merge(answers_w_colors, users, on='user_id')

users_answers.head()

users_answers['spam_prob'].describe()

users_answers_clean = users_answers[users_answers['spam_prob'] < users_answers['spam_prob'].mean()]

print("This reduces the dataset from ", len(users_answers), "users to ", len(users_answers_clean), "users.")

This reduces the dataset from  1058211 users to  672374 users.

# for HEX to RGB
from PIL import ImageColor
# for cleaner appearance
import warnings
warnings.filterwarnings('ignore')

# convert HEX to RGB
users_answers_clean['rgb'] = users_answers_clean['hex'].apply(ImageColor.getcolor, mode="RGB")

# split apart the RBG tuple into separate variables
users_answers_clean['red'] = [users_answers_clean['rgb'].iloc[i][0] for i in range(len(users_answers_clean))]
users_answers_clean['green'] = [users_answers_clean['rgb'].iloc[i][1] for i in range(len(users_answers_clean))]
users_answers_clean['blue'] = [users_answers_clean['rgb'].iloc[i][2] for i in range(len(users_answers_clean))]

# convert colorblind to category type variable
users_answers_clean['colorblind'] = users_answers_clean['colorblind'].astype("category")

users_answers_clean.head()

# for ternary plots
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook_connected"

five_colors = users_answers_clean.sample(n=5, random_state=12)
# add a label variable to allow color mapping in plotly
five_colors["label"]=["color1","color2","color3","color4","color5"]

fig = px.scatter_ternary(five_colors, a="red", b="blue", c="green", color="label",
                         color_discrete_map={
                             "color1": ("rgba" + str(five_colors['rgb'].iloc[0])[:-1] + ", 1.0)"),
                             "color2": ("rgba" + str(five_colors['rgb'].iloc[1])[:-1] + ", 1.0)"),
                             "color3": ("rgba" + str(five_colors['rgb'].iloc[2])[:-1] + ", 1.0)"),
                             "color4": ("rgba" + str(five_colors['rgb'].iloc[3])[:-1] + ", 1.0)"),
                             "color5": ("rgba" + str(five_colors['rgb'].iloc[4])[:-1] + ", 1.0)")},
                         title="Ternary Plot of Five Random RGB Colors")
fig.update_layout(showlegend=False)
fig.show()

users_answers_clean['color'].value_counts()

greens = users_answers_clean[users_answers_clean['color']=="green"]

fig = px.scatter_ternary(greens, a="red", b="blue", c="green", color="colorblind",
                         color_discrete_map = {1:"rgba(130, 200, 50, 0.5)", 0:"rgba(50, 90, 50, 1.0)"},
                         title="RGB Colors Labeled as Green by Colorblind and Non-Colorblind Users")
fig.show()

blues = users_answers_clean[users_answers_clean['color']=="blue"]

fig = px.scatter_ternary(blues, a="red", b="blue", c="green", color="colorblind",
                         color_discrete_map = {1:"rgba(90, 230, 230, 0.5)", 0:"rgba(50, 50, 90, 1.0)"},
                         title="RGB Colors Labeled as Blue by Colorblind and Non-Colorblind Users")
fig.show()

purples = users_answers_clean[users_answers_clean['color']=="purple"]

fig = px.scatter_ternary(purples, a="red", b="blue", c="green", color="colorblind",
                         color_discrete_map = {1:"rgba(220, 170, 250, 0.5)", 0:"rgba(120, 10, 150, 1.0)"},
                         title="RGB Colors Labeled as Purple by Colorblind and Non-Colorblind Users")
fig.show()

pinks = users_answers_clean[users_answers_clean['color']=="pink"]

fig = px.scatter_ternary(pinks, a="red", b="blue", c="green", color="colorblind",
                         color_discrete_map = {1:"rgba(255, 195, 240, 0.5)", 0:"rgba(255, 70, 210, 1.0)"},
                         title="RGB Colors Labeled as Pink by Colorblind and Non-Colorblind Users")
fig.show()

browns = users_answers_clean[users_answers_clean['color']=="brown"]

fig = px.scatter_ternary(browns, a="red", b="blue", c="green", color="colorblind",
                         color_discrete_map = {1:"rgba(180, 125, 40, 0.5)", 0:"rgba(100, 65, 10, 1.0)"},
                         title="RGB Colors Labeled As Brown By Colorblind And Non-Colorblind Users")
fig.show()

	user_id	hex	rank	color	monitor	y_chromosome	spam_prob
0	1	#8240EA	1	purple	LCD	1.0	0.002088
1	2	#4B31EA	3	blue	LCD	1.0	0.074577
2	2	#584601	5	brown	LCD	1.0	0.074577
3	2	#DA239C	4	pink	LCD	1.0	0.074577
4	2	#B343E5	1	purple	LCD	1.0	0.074577

	spam_prob
count	1.058211e+06
mean	1.259867e-01
std	2.704004e-01
min	7.416590e-05
25%	3.713345e-02
50%	9.042199e-02
75%	1.634402e-01
max	8.428621e+00

	user_id	hex	rank	color	monitor	y_chromosome	spam_prob	rgb	red	green	blue
0	1	#8240EA	1	purple	LCD	1.0	0.002088	(130, 64, 234)	130	64	234
1	2	#4B31EA	3	blue	LCD	1.0	0.074577	(75, 49, 234)	75	49	234
2	2	#584601	5	brown	LCD	1.0	0.074577	(88, 70, 1)	88	70	1
3	2	#DA239C	4	pink	LCD	1.0	0.074577	(218, 35, 156)	218	35	156
4	2	#B343E5	1	purple	LCD	1.0	0.074577	(179, 67, 229)	179	67	229

Tidy Tuesday for July 8th, 2025¶

The XKCD Color Survey¶

Irene Morse¶

Setup and Introduction¶

Step 1: Merge Datasets and Remove Spam¶

Step 2: Convert HEX Color Codes to RGB Color Codes for Better Spatial Mapping¶

Step 3: Visualize Perceptual Differences Between Colorblind Users and Non-Colorblind Users¶

RBG Colors Labeled as Green¶

RBG Colors Labeled as Blue¶

RBG Colors Labeled as Purple¶

RBG Colors Labeled as Pink¶

RBG Colors Labeled as Brown¶

Conclusion¶