Adidas Sales Analysis - Project Code

Project Report for STAT 303-1 Fall 2025

Author

Elisa Duan

Published

December 9, 2025

Abstract
This file contains the code for the project.

3) Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
adidas = pd.read_csv('Adidas US Sales Datasets.csv', skiprows = 3, header = 0)
adidas.head()
Retailer Retailer ID Invoice Date Region State City Product Price per Unit Units Sold Total Sales Operating Profit Operating Margin Sales Method
0 Foot Locker 1185732 1/1/20 Northeast New York New York Men's Street Footwear $50.00 1,200 $600,000 $300,000 50% In-store
1 Foot Locker 1185732 1/2/20 Northeast New York New York Men's Athletic Footwear $50.00 1,000 $500,000 $150,000 30% In-store
2 Foot Locker 1185732 1/3/20 Northeast New York New York Women's Street Footwear $40.00 1,000 $400,000 $140,000 35% In-store
3 Foot Locker 1185732 1/4/20 Northeast New York New York Women's Athletic Footwear $45.00 850 $382,500 $133,875 35% In-store
4 Foot Locker 1185732 1/5/20 Northeast New York New York Men's Apparel $60.00 900 $540,000 $162,000 30% In-store
adidas.shape
(9648, 13)
adidas.isnull().sum()
Retailer            0
Retailer ID         0
Invoice Date        0
Region              0
State               0
City                0
Product             0
Price per Unit      0
Units Sold          0
Total Sales         0
Operating Profit    0
Operating Margin    0
Sales Method        0
dtype: int64
#adidas.dtypes
adidas
Retailer Retailer ID Invoice Date Region State City Product Price per Unit Units Sold Total Sales Operating Profit Operating Margin Sales Method
0 Foot Locker 1185732 1/1/20 Northeast New York New York Men's Street Footwear $50.00 1,200 $600,000 $300,000 50% In-store
1 Foot Locker 1185732 1/2/20 Northeast New York New York Men's Athletic Footwear $50.00 1,000 $500,000 $150,000 30% In-store
2 Foot Locker 1185732 1/3/20 Northeast New York New York Women's Street Footwear $40.00 1,000 $400,000 $140,000 35% In-store
3 Foot Locker 1185732 1/4/20 Northeast New York New York Women's Athletic Footwear $45.00 850 $382,500 $133,875 35% In-store
4 Foot Locker 1185732 1/5/20 Northeast New York New York Men's Apparel $60.00 900 $540,000 $162,000 30% In-store
... ... ... ... ... ... ... ... ... ... ... ... ... ...
9643 Foot Locker 1185732 1/24/21 Northeast New Hampshire Manchester Men's Apparel $50.00 64 $3,200 $896 28% Outlet
9644 Foot Locker 1185732 1/24/21 Northeast New Hampshire Manchester Women's Apparel $41.00 105 $4,305 $1,378 32% Outlet
9645 Foot Locker 1185732 2/22/21 Northeast New Hampshire Manchester Men's Street Footwear $41.00 184 $7,544 $2,791 37% Outlet
9646 Foot Locker 1185732 2/22/21 Northeast New Hampshire Manchester Men's Athletic Footwear $42.00 70 $2,940 $1,235 42% Outlet
9647 Foot Locker 1185732 2/22/21 Northeast New Hampshire Manchester Women's Street Footwear $29.00 83 $2,407 $650 27% Outlet

9648 rows × 13 columns

adidas.columns = adidas.columns.str.strip()
adidas['Units Sold'] = pd.to_numeric(adidas['Units Sold'].replace(',', '', regex=True))
adidas['Price per Unit'] = pd.to_numeric(adidas['Price per Unit'].replace(r'[\$]', '', regex=True))
adidas['Total Sales'] = pd.to_numeric(adidas['Total Sales'].replace(r'[\$,]', '', regex=True))
adidas['Operating Profit'] = pd.to_numeric(adidas['Operating Profit'].replace(r'[\$,]', '', regex=True))
adidas['Operating Margin'] = pd.to_numeric(adidas['Operating Margin'].replace(r'[\%]', '', regex=True)) / 100
adidas['Invoice Date'] = pd.to_datetime(adidas['Invoice Date'].str.strip(),format='%m/%d/%y')
adidas.head()
Retailer Retailer ID Invoice Date Region State City Product Price per Unit Units Sold Total Sales Operating Profit Operating Margin Sales Method
0 Foot Locker 1185732 2020-01-01 Northeast New York New York Men's Street Footwear 50.0 1200 600000 300000 0.50 In-store
1 Foot Locker 1185732 2020-01-02 Northeast New York New York Men's Athletic Footwear 50.0 1000 500000 150000 0.30 In-store
2 Foot Locker 1185732 2020-01-03 Northeast New York New York Women's Street Footwear 40.0 1000 400000 140000 0.35 In-store
3 Foot Locker 1185732 2020-01-04 Northeast New York New York Women's Athletic Footwear 45.0 850 382500 133875 0.35 In-store
4 Foot Locker 1185732 2020-01-05 Northeast New York New York Men's Apparel 60.0 900 540000 162000 0.30 In-store
adidas.dtypes
Retailer                    object
Retailer ID                  int64
Invoice Date        datetime64[ns]
Region                      object
State                       object
City                        object
Product                     object
Price per Unit             float64
Units Sold                   int64
Total Sales                  int64
Operating Profit             int64
Operating Margin           float64
Sales Method                object
dtype: object

5) Data Cleaning

a) Cleaning - Question 1

clean1 = adidas[['City', 'State', 'Total Sales', 'Operating Profit', 'Sales Method']]
numcols = clean1.select_dtypes(include=[np.number]).columns #find numeric columns
catcols = clean1.select_dtypes(exclude=[np.number]).columns
outliersc = {}

for col in numcols:
    Q1 = adidas[col].quantile(0.25)
    Q3 = adidas[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = ((adidas[col] < lower) | (adidas[col] > upper)).sum()
    outliersc[col] = outliers

print("Outliers:", outliersc)
Outliers: {'Total Sales': 653, 'Operating Profit': 706}
catnum = {}

for col in catcols:
    unique_count = adidas[col].nunique()
    catnum[col] = unique_count

print("# of categories for each cat var:", catnum)

for col in catcols:
    print(f"\nCategory counts for {col}:")
    print(adidas[col].value_counts())
# of categories for each cat var: {'City': 52, 'State': 50, 'Sales Method': 3}

Category counts for City:
City
Portland          360
Charleston        288
Orlando           216
Salt Lake City    216
Houston           216
Boise             216
Phoenix           216
Albuquerque       216
Atlanta           216
New York          216
Jackson           216
Little Rock       216
Oklahoma City     216
Hartford          216
Providence        216
Boston            216
Burlington        216
Richmond          216
New Orleans       216
Manchester        216
Dallas            216
Philadelphia      216
Knoxville         216
Birmingham        216
Las Vegas         216
Los Angeles       216
San Francisco     216
Chicago           144
Newark            144
Baltimore         144
Indianapolis      144
Milwaukee         144
Des Moines        144
Fargo             144
Sioux Falls       144
Wichita           144
Wilmington        144
Honolulu          144
Albany            144
Louisville        144
Columbus          144
Charlotte         144
Seattle           144
Miami             144
Minneapolis       144
Billings          144
Omaha             144
St. Louis         144
Detroit           144
Anchorage         144
Cheyenne          144
Denver            144
Name: count, dtype: int64

Category counts for State:
State
California        432
Texas             432
New York          360
Florida           360
Mississippi       216
Oregon            216
Louisiana         216
Idaho             216
New Mexico        216
Georgia           216
Arkansas          216
Virginia          216
Oklahoma          216
Connecticut       216
Rhode Island      216
Massachusetts     216
Vermont           216
Utah              216
Arizona           216
New Hampshire     216
Pennsylvania      216
Nevada            216
Alabama           216
Tennessee         216
South Dakota      144
Illinois          144
Colorado          144
New Jersey        144
Delaware          144
Maryland          144
West Virginia     144
Indiana           144
Wisconsin         144
Iowa              144
North Dakota      144
Michigan          144
Kansas            144
Missouri          144
Minnesota         144
Montana           144
Kentucky          144
Ohio              144
North Carolina    144
South Carolina    144
Nebraska          144
Maine             144
Alaska            144
Hawaii            144
Wyoming           144
Washington        144
Name: count, dtype: int64

Category counts for Sales Method:
Sales Method
Online      4889
Outlet      3019
In-store    1740
Name: count, dtype: int64
incorrectval = {}
for col in numcols:
    neg = adidas[(adidas[col] < 0)]  # negative values problem
    if len(neg) > 0:
        incorrectval[col] = neg

print("Incorrect numeric values:", incorrectval)
Incorrect numeric values: {}

b) Cleaning - Question 2

clean2 = adidas[['Retailer', 'City', 'State', 'Total Sales', 'Operating Profit']]
numcols2 = clean2.select_dtypes(include=[np.number]).columns #find numeric columns
catcols2 = clean2.select_dtypes(exclude=[np.number]).columns
outliersc2 = {}

for col in numcols2:
    Q1 = adidas[col].quantile(0.25)
    Q3 = adidas[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = ((adidas[col] < lower) | (adidas[col] > upper)).sum()
    outliersc2[col] = outliers

print("Outliers:", outliersc2)
Outliers: {'Total Sales': 653, 'Operating Profit': 706}
catnum2 = {}

for col in catcols2:
    unique_count = adidas[col].nunique()
    catnum2[col] = unique_count

print("# of categories for each cat var:", catnum2)

for col in catcols2:
    print(f"\nCategory counts for {col}:")
    print(adidas[col].value_counts())
# of categories for each cat var: {'Retailer': 6, 'City': 52, 'State': 50}

Category counts for Retailer:
Retailer
Foot Locker      2637
West Gear        2374
Sports Direct    2032
Kohl's           1030
Amazon            949
Walmart           626
Name: count, dtype: int64

Category counts for City:
City
Portland          360
Charleston        288
Orlando           216
Salt Lake City    216
Houston           216
Boise             216
Phoenix           216
Albuquerque       216
Atlanta           216
New York          216
Jackson           216
Little Rock       216
Oklahoma City     216
Hartford          216
Providence        216
Boston            216
Burlington        216
Richmond          216
New Orleans       216
Manchester        216
Dallas            216
Philadelphia      216
Knoxville         216
Birmingham        216
Las Vegas         216
Los Angeles       216
San Francisco     216
Chicago           144
Newark            144
Baltimore         144
Indianapolis      144
Milwaukee         144
Des Moines        144
Fargo             144
Sioux Falls       144
Wichita           144
Wilmington        144
Honolulu          144
Albany            144
Louisville        144
Columbus          144
Charlotte         144
Seattle           144
Miami             144
Minneapolis       144
Billings          144
Omaha             144
St. Louis         144
Detroit           144
Anchorage         144
Cheyenne          144
Denver            144
Name: count, dtype: int64

Category counts for State:
State
California        432
Texas             432
New York          360
Florida           360
Mississippi       216
Oregon            216
Louisiana         216
Idaho             216
New Mexico        216
Georgia           216
Arkansas          216
Virginia          216
Oklahoma          216
Connecticut       216
Rhode Island      216
Massachusetts     216
Vermont           216
Utah              216
Arizona           216
New Hampshire     216
Pennsylvania      216
Nevada            216
Alabama           216
Tennessee         216
South Dakota      144
Illinois          144
Colorado          144
New Jersey        144
Delaware          144
Maryland          144
West Virginia     144
Indiana           144
Wisconsin         144
Iowa              144
North Dakota      144
Michigan          144
Kansas            144
Missouri          144
Minnesota         144
Montana           144
Kentucky          144
Ohio              144
North Carolina    144
South Carolina    144
Nebraska          144
Maine             144
Alaska            144
Hawaii            144
Wyoming           144
Washington        144
Name: count, dtype: int64
incorrectval2 = {}
for col in numcols2:
    neg2 = adidas[(adidas[col] < 0)]  # negative values problem
    if len(neg) > 0:
        incorrectval2[col] = neg2

print("Incorrect numeric values:", incorrectval2)
Incorrect numeric values: {}

c) Cleaning - Question 3

clean3 = adidas[['Product', 'City', 'State', 'Total Sales', 'Operating Profit']]
numcols3 = clean3.select_dtypes(include=[np.number]).columns #find numeric columns
catcols3 = clean3.select_dtypes(exclude=[np.number]).columns
outliersc3 = {}

for col in numcols3:
    Q1 = adidas[col].quantile(0.25)
    Q3 = adidas[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = ((adidas[col] < lower) | (adidas[col] > upper)).sum()
    outliersc3[col] = outliers

print("Outliers:", outliersc3)
Outliers: {'Total Sales': 653, 'Operating Profit': 706}
catnum3 = {}

for col in catcols3:
    unique_count = adidas[col].nunique()
    catnum3[col] = unique_count

print("# of categories for each cat var:", catnum3)

for col in catcols3:
    print(f"\nCategory counts for {col}:")
    print(adidas[col].value_counts())
# of categories for each cat var: {'Product': 6, 'City': 52, 'State': 50}

Category counts for Product:
Product
Men's Street Footwear        1610
Men's Athletic Footwear      1610
Women's Street Footwear      1608
Women's Apparel              1608
Women's Athletic Footwear    1606
Men's Apparel                1606
Name: count, dtype: int64

Category counts for City:
City
Portland          360
Charleston        288
Orlando           216
Salt Lake City    216
Houston           216
Boise             216
Phoenix           216
Albuquerque       216
Atlanta           216
New York          216
Jackson           216
Little Rock       216
Oklahoma City     216
Hartford          216
Providence        216
Boston            216
Burlington        216
Richmond          216
New Orleans       216
Manchester        216
Dallas            216
Philadelphia      216
Knoxville         216
Birmingham        216
Las Vegas         216
Los Angeles       216
San Francisco     216
Chicago           144
Newark            144
Baltimore         144
Indianapolis      144
Milwaukee         144
Des Moines        144
Fargo             144
Sioux Falls       144
Wichita           144
Wilmington        144
Honolulu          144
Albany            144
Louisville        144
Columbus          144
Charlotte         144
Seattle           144
Miami             144
Minneapolis       144
Billings          144
Omaha             144
St. Louis         144
Detroit           144
Anchorage         144
Cheyenne          144
Denver            144
Name: count, dtype: int64

Category counts for State:
State
California        432
Texas             432
New York          360
Florida           360
Mississippi       216
Oregon            216
Louisiana         216
Idaho             216
New Mexico        216
Georgia           216
Arkansas          216
Virginia          216
Oklahoma          216
Connecticut       216
Rhode Island      216
Massachusetts     216
Vermont           216
Utah              216
Arizona           216
New Hampshire     216
Pennsylvania      216
Nevada            216
Alabama           216
Tennessee         216
South Dakota      144
Illinois          144
Colorado          144
New Jersey        144
Delaware          144
Maryland          144
West Virginia     144
Indiana           144
Wisconsin         144
Iowa              144
North Dakota      144
Michigan          144
Kansas            144
Missouri          144
Minnesota         144
Montana           144
Kentucky          144
Ohio              144
North Carolina    144
South Carolina    144
Nebraska          144
Maine             144
Alaska            144
Hawaii            144
Wyoming           144
Washington        144
Name: count, dtype: int64
incorrectval3 = {}
for col in numcols3:
    neg3 = adidas[(adidas[col] < 0)]  # negative values problem
    if len(neg) > 0:
        incorrectval3[col] = neg3

print("Incorrect numeric values:", incorrectval3)
Incorrect numeric values: {}

6) Data Analysis

a) Analysis 1

nationalsm = adidas.groupby('Sales Method').agg(Total_Sales=('Total Sales', 'sum'),
                                                Total_Operating_Profit=('Operating Profit', 'sum')).reset_index()

nationalsm = nationalsm.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})
nationalsm #compare sales method disregarding city/state
#in-store are most popular
Sales Method Total Sales Total Operating Profit
0 In-store 356643750 127591382
1 Online 247672882 96555337
2 Outlet 295585493 107988403
profits = nationalsm.set_index('Sales Method')['Total Operating Profit'].to_dict()

comparisons = {"In-store vs Online": profits['In-store'] - profits['Online'], 
"In-store vs Outlet": profits['In-store'] - profits['Outlet'],
"Online vs Outlet": profits['Online'] - profits['Outlet']} 

#turn it in to a df
comparisons_df = pd.DataFrame(list(comparisons.items()), columns=["Comparison", "Total Operating Profit Difference"])
comparisons_df
Comparison Total Operating Profit Difference
0 In-store vs Online 31036045
1 In-store vs Outlet 19602979
2 Online vs Outlet -11433066
labels = list(comparisons.keys())
values = list(comparisons.values())

plt.bar(labels, values, color=["#BDD7EE", "#C9E2F4", "#D6EAF8"])

plt.axhline(0, color='black', linewidth=1)
plt.yticks(values, [f"{v:,}" for v in values])   #give exact values
plt.ylabel("Total Operating Profit Difference ($)")
plt.title("Total Operating Profit Differences Between Sales Methods")

plt.tight_layout()
plt.show()

combosm = adidas.groupby(['Sales Method', 'City', 'State']).agg(Total_Sales=('Total Sales', 'sum'), 
Total_Operating_Profit=('Operating Profit', 'sum')).reset_index()
#.reset_index() to make the multiindex --> columns

combosm = combosm.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})
combosm = combosm.sort_values("State").reset_index(drop=True)

combosm.head(10)
Sales Method City State Total Sales Total Operating Profit
0 Outlet Birmingham Alabama 17191868 8863634
1 Online Birmingham Alabama 441556 283947
2 Outlet Anchorage Alaska 14380750 4340225
3 Online Anchorage Alaska 372353 158134
4 Outlet Phoenix Arizona 356709 135066
5 Online Phoenix Arizona 15425512 5209612
6 Outlet Little Rock Arkansas 284365 122671
7 Online Little Rock Arkansas 12354982 4793032
8 Online Los Angeles California 629877 296413
9 Outlet Los Angeles California 25005036 8748518
best_city_state = (
    combosm.loc[combosm.groupby(["City", "State"])["Total Operating Profit"].idxmax()].sort_values(["State", "City"])
    .reset_index(drop=True))

best_city_state #for every city/state, only kept the sales method that has greated Total Operating Profit
Sales Method City State Total Sales Total Operating Profit
0 Outlet Birmingham Alabama 17191868 8863634
1 Outlet Anchorage Alaska 14380750 4340225
2 Online Phoenix Arizona 15425512 5209612
3 Online Little Rock Arkansas 12354982 4793032
4 Outlet Los Angeles California 25005036 8748518
5 Outlet San Francisco California 33680352 9900809
6 In-store Denver Colorado 20475000 7458942
7 In-store Hartford Connecticut 11030000 3911402
8 In-store Wilmington Delaware 11988750 4373366
9 In-store Miami Florida 30807500 11765821
10 In-store Orlando Florida 20997500 6544939
11 Online Atlanta Georgia 18575819 6723339
12 Outlet Honolulu Hawaii 21720000 5633381
13 Online Boise Idaho 18840711 7920946
14 Outlet Chicago Illinois 9548250 3790803
15 In-store Indianapolis Indiana 8612750 3266316
16 In-store Des Moines Iowa 7238000 2566213
17 Online Wichita Kansas 9972864 3510159
18 Online Louisville Kentucky 10072848 3935833
19 Online New Orleans Louisiana 19837823 7841032
20 Outlet Portland Maine 8962500 3077681
21 In-store Baltimore Maryland 7563250 2664616
22 In-store Boston Massachusetts 10553750 3151648
23 In-store Detroit Michigan 18153500 7872830
24 Outlet Minneapolis Minnesota 5355500 1928218
25 Online Jackson Mississippi 15240524 6211421
26 In-store St. Louis Missouri 9437500 3778630
27 Outlet Billings Montana 15311250 6023630
28 Outlet Omaha Nebraska 5778000 2358530
29 In-store Las Vegas Nevada 11558750 3657127
30 Outlet Manchester New Hampshire 10380394 3595323
31 In-store Newark New Jersey 9682000 3533704
32 Online Albuquerque New Mexico 19424023 6569822
33 In-store Albany New York 23815000 9121071
34 Outlet New York New York 35277364 12121563
35 Online Charlotte North Carolina 23956531 9756425
36 In-store Fargo North Dakota 6742500 2322475
37 Online Columbus Ohio 18484583 7528843
38 Online Oklahoma City Oklahoma 10378110 4003232
39 In-store Portland Oregon 20336250 7127816
40 Outlet Philadelphia Pennsylvania 10102069 4023655
41 In-store Providence Rhode Island 8049500 2869454
42 Online Charleston South Carolina 29285637 11324247
43 Online Sioux Falls South Dakota 8495576 2943322
44 Outlet Knoxville Tennessee 17625079 8233073
45 Outlet Dallas Texas 20384362 8558821
46 Outlet Houston Texas 24831905 9528682
47 In-store Salt Lake City Utah 9232000 3649541
48 In-store Burlington Vermont 13679250 5456531
49 In-store Richmond Virginia 20556250 7271816
50 In-store Seattle Washington 25676250 6737442
51 In-store Charleston West Virginia 10420000 4142252
52 In-store Milwaukee Wisconsin 7531750 2706338
53 In-store Cheyenne Wyoming 18105750 6320027
method_counts = best_city_state["Sales Method"].value_counts()

plt.figure(figsize=(6,6))

plt.pie(method_counts,labels=method_counts.index,
    autopct=lambda pct: f"{pct:.1f}%\n({int(round(pct/100 * method_counts.sum()))})",
    colors=["#BDD7EE", "#C9E2F4", "#D6EAF8"],
    startangle=90)

plt.title("Percentage of Cities Where Each Sales Method Leads in Performance")
plt.tight_layout()
plt.show()

unique_city_state_count = adidas.groupby(["City", "State"]).ngroups
unique_city_state_count
#sanity check
54
top_profitable = combosm.sort_values('Total Operating Profit', ascending=False).reset_index(drop=True) 
top_profitable.head() #there should be 3 new yorks
Sales Method City State Total Sales Total Operating Profit
0 Outlet New York New York 35277364 12121563
1 In-store Miami Florida 30807500 11765821
2 Online Charleston South Carolina 29285637 11324247
3 Outlet San Francisco California 33680352 9900809
4 Online Charlotte North Carolina 23956531 9756425
plottop = top_profitable.head(25)

labels = plottop['City'] + ", " + plottop['State'] + " (" + plottop['Sales Method'] + ")"
plt.figure(figsize=(15, 7))
plt.barh(labels, plottop['Total Operating Profit'], color="#BDD7EE")
plt.xlabel("Total Operating Profit")
plt.ylabel("City, State (Sales Method)")
xticks = plt.xticks()[0]
plt.xticks(xticks, [f"{int(x):,}" for x in xticks])
plt.title("Top 25 Most Profitable City/State (Sales Method)")

plt.tight_layout()
plt.show()

b) Analysis 2

city_state_profit = adidas.groupby(["City", "State"]).agg(Total_Sales=("Total Sales", "sum"),
                                                          Total_Operating_Profit=("Operating Profit", "sum")).reset_index().sort_values("Total_Operating_Profit", ascending=False)
city_state_profit = city_state_profit.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})
city_state_profit.head(10)
City State Total Sales Total Operating Profit
36 New York New York 39801235 13899981
32 Miami Florida 31600863 12168628
10 Charleston South Carolina 29285637 11324247
48 San Francisco California 34539220 10256252
23 Houston Texas 25456882 9845140
12 Charlotte North Carolina 23956531 9756425
0 Albany New York 24427804 9429864
35 New Orleans Louisiana 23750781 9417239
6 Birmingham Alabama 17633424 9147581
29 Los Angeles California 25634913 9044931
state_profit = adidas.groupby(["State"]).agg(Total_Sales=("Total Sales", "sum"),
                                             Total_Operating_Profit=("Operating Profit", "sum")).reset_index().sort_values("Total_Operating_Profit", ascending=False)
state_profit = state_profit.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})
state_profit.head(10)
State Total Sales Total Operating Profit
31 New York 64229039 23329845
8 Florida 59283714 20926219
4 California 60174133 19301183
42 Texas 46359746 18688214
39 South Carolina 29285637 11324247
32 North Carolina 23956531 9756425
17 Louisiana 23750781 9417239
0 Alabama 17633424 9147581
41 Tennessee 18067440 8493670
21 Michigan 18625433 8135902
city_state_list = [
    ('New York', 'New York'),
    ('Houston', 'Texas'),
    ('San Francisco', 'California'),
    ('Los Angeles', 'California'),
    ('Chicago', 'Illinois'),
    ('Dallas', 'Texas'),
    ('Philadelphia', 'Pennsylvania'),
    ('Las Vegas', 'Nevada'),
    ('Denver', 'Colorado'),
    ('Seattle', 'Washington'),
    ('Miami', 'Florida'),
    ('Minneapolis', 'Minnesota'),
    ('Billings', 'Montana'),
    ('Knoxville', 'Tennessee'),
    ('Omaha', 'Nebraska'),
    ('Birmingham', 'Alabama'),
    ('Portland', 'Maine'),
    ('Anchorage', 'Alaska'),
    ('Honolulu', 'Hawaii'),
    ('Orlando', 'Florida'),
    ('Albany', 'New York'),
    ('Cheyenne', 'Wyoming'),
    ('Richmond', 'Virginia'),
    ('Detroit', 'Michigan'),
    ('St. Louis', 'Missouri'),
    ('Salt Lake City', 'Utah'),
    ('Portland', 'Oregon'),
    ('New Orleans', 'Louisiana'),
    ('Boise', 'Idaho'),
    ('Phoenix', 'Arizona'),
    ('Albuquerque', 'New Mexico'),
    ('Atlanta', 'Georgia'),
    ('Charleston', 'South Carolina'),
    ('Charlotte', 'North Carolina'),
    ('Columbus', 'Ohio'),
    ('Louisville', 'Kentucky'),
    ('Jackson', 'Mississippi'),
    ('Little Rock', 'Arkansas'),
    ('Oklahoma City', 'Oklahoma'),
    ('Wichita', 'Kansas'),
    ('Sioux Falls', 'South Dakota'),
    ('Fargo', 'North Dakota'),
    ('Des Moines', 'Iowa'),
    ('Milwaukee', 'Wisconsin'),
    ('Indianapolis', 'Indiana'),
    ('Charleston', 'West Virginia'),
    ('Baltimore', 'Maryland'),
    ('Wilmington', 'Delaware'),
    ('Newark', 'New Jersey'),
    ('Hartford', 'Connecticut'),
    ('Providence', 'Rhode Island'),
    ('Boston', 'Massachusetts'),
    ('Burlington', 'Vermont'),
    ('Manchester', 'New Hampshire')]

city_df = pd.DataFrame(city_state_list, columns=["City", "State"])

state_map = {
    "New York": "NY", "Texas": "TX", "California": "CA", "Illinois": "IL",
    "Pennsylvania": "PA", "Nevada": "NV", "Colorado": "CO", "Washington": "WA",
    "Florida": "FL", "Minnesota": "MN", "Montana": "MT", "Tennessee": "TN",
    "Nebraska": "NE", "Alabama": "AL", "Maine": "ME", "Alaska": "AK",
    "Hawaii": "HI", "Wyoming": "WY", "Virginia": "VA", "Michigan": "MI",
    "Missouri": "MO", "Utah": "UT", "Oregon": "OR", "Louisiana": "LA",
    "Idaho": "ID", "Arizona": "AZ", "New Mexico": "NM", "Georgia": "GA",
    "South Carolina": "SC", "North Carolina": "NC", "Ohio": "OH",
    "Kentucky": "KY", "Mississippi": "MS", "Arkansas": "AR", "Oklahoma": "OK",
    "Kansas": "KS", "South Dakota": "SD", "North Dakota": "ND",
    "Iowa": "IA", "Wisconsin": "WI", "Indiana": "IN", "Maryland": "MD",
    "Delaware": "DE", "New Jersey": "NJ", "Connecticut": "CT",
    "Rhode Island": "RI", "Massachusetts": "MA", "Vermont": "VT",
    "New Hampshire": "NH"}

city_df["State_Abbrev"] = city_df["State"].map(state_map)
us_cities = pd.read_csv("uscities.csv")

city_coords = city_df.merge(us_cities[["city", "state_id", "lat", "lng"]],left_on=["City", "State_Abbrev"],
    right_on=["city", "state_id"],
    how="left")

city_coords = city_coords.drop(columns=["city", "state_id"])
city_coords = city_coords.rename(columns={"lat": "Latitude", "lng": "Longitude"})

#city_coords (city, state, state_abbrev, latitude, longitude)
city_profit = (adidas.groupby(["City", "State"])["Operating Profit"]
          .sum()
          .reset_index())
merged_city_map = city_profit.merge(
    city_coords,
    on=["City", "State"],
    how="left")
merged_city_map
City State Operating Profit State_Abbrev Latitude Longitude
0 Albany New York 9429864 NY 42.6664 -73.7987
1 Albuquerque New Mexico 6738070 NM 35.1054 -106.6465
2 Anchorage Alaska 4498359 AK 61.1508 -149.1091
3 Atlanta Georgia 6893299 GA 33.7628 -84.4220
4 Baltimore Maryland 2757648 MD 39.3051 -76.6144
5 Billings Montana 6232040 MT 45.7891 -108.5526
6 Birmingham Alabama 9147581 AL 33.5279 -86.7971
7 Boise Idaho 8121123 ID 43.6005 -116.2308
8 Boston Massachusetts 3353884 MA 42.3188 -71.0852
9 Burlington Vermont 5785973 VT 44.4876 -73.2316
10 Charleston South Carolina 11324247 SC 32.8168 -79.9687
11 Charleston West Virginia 4282958 NaN NaN NaN
12 Charlotte North Carolina 9756425 NC 35.2083 -80.8303
13 Cheyenne Wyoming 6544076 WY 41.1350 -104.7902
14 Chicago Illinois 3920377 IL 41.8375 -87.6866
15 Columbus Ohio 7528843 OH 39.9862 -82.9855
16 Dallas Texas 8843074 TX 32.7935 -96.7667
17 Denver Colorado 7713562 CO 39.7620 -104.8758
18 Des Moines Iowa 2655220 IA 41.5725 -93.6105
19 Detroit Michigan 8135902 MI 42.3834 -83.1024
20 Fargo North Dakota 2688518 ND 46.8651 -96.8292
21 Hartford Connecticut 4152327 CT 41.7661 -72.6834
22 Honolulu Hawaii 5849802 HI 21.3294 -157.8460
23 Houston Texas 9845140 TX 29.7860 -95.3885
24 Indianapolis Indiana 3379262 IN 39.7771 -86.1458
25 Jackson Mississippi 6369102 MS 32.3157 -90.2125
26 Knoxville Tennessee 8493670 TN 35.9692 -83.9496
27 Las Vegas Nevada 7060660 NV 36.2333 -115.2654
28 Little Rock Arkansas 4915703 AR 34.7256 -92.3577
29 Los Angeles California 9044931 CA 34.1141 -118.4068
30 Louisville Kentucky 3935833 KY 38.1663 -85.6485
31 Manchester New Hampshire 5786209 NH 42.9848 -71.4447
32 Miami Florida 12168628 FL 25.7840 -80.2101
33 Milwaukee Wisconsin 2800835 WI 43.0642 -87.9675
34 Minneapolis Minnesota 2670235 MN 44.9635 -93.2678
35 New Orleans Louisiana 9417239 LA 30.0687 -89.9288
36 New York New York 13899981 NY 40.6943 -73.9249
37 Newark New Jersey 3657247 NJ 40.7245 -74.1725
38 Oklahoma City Oklahoma 4106535 OK 35.4676 -97.5136
39 Omaha Nebraska 2439482 NE 41.2627 -96.0529
40 Orlando Florida 8757591 FL 28.4773 -81.3370
41 Philadelphia Pennsylvania 4156749 PA 40.0077 -75.1339
42 Phoenix Arizona 5344678 AZ 33.5722 -112.0892
43 Portland Maine 3187662 ME 43.6773 -70.2715
44 Portland Oregon 7573151 OR 45.5371 -122.6500
45 Providence Rhode Island 3045828 RI 41.8230 -71.4187
46 Richmond Virginia 7719439 VA 37.5295 -77.4756
47 Salt Lake City Utah 3873440 UT 40.7776 -111.9311
48 San Francisco California 10256252 CA 37.7558 -122.4449
49 Seattle Washington 6991412 WA 47.6211 -122.3244
50 Sioux Falls South Dakota 2943322 SD 43.5396 -96.7311
51 St. Louis Missouri 3907217 MO 38.6359 -90.2451
52 Wichita Kansas 3510159 KS 37.6895 -97.3443
53 Wilmington Delaware 4524358 DE 39.7415 -75.5416
import plotly.express as px
import plotly.io as pio
fig = px.scatter_geo(
    merged_city_map,
    lat="Latitude",
    lon="Longitude",
    scope="usa",
    projection="albers usa",
    color="Operating Profit",
    size="Operating Profit",
    text="City",
    hover_name="City",
    hover_data=["State", "Operating Profit"],
    color_continuous_scale="Blues")

fig.update_traces(textposition="top center")
fig.update_layout(
    title="Adidas Operating Profit by City/State",
    height=2000,
    width=1200)
fig.show()

## CHECK THIRD HTML (UNABLE TO RENDER UNITED STATES MAP INTO CURRENT HTML)!!!!
pio.write_html(fig,"adidas_operating_profit_map.html", full_html=True, include_plotlyjs="cdn")
retailer = adidas.groupby(["Retailer"]).agg(
    Total_Sales=("Total Sales", "sum"), Total_Operating_Profit=("Operating Profit", "sum")).reset_index().sort_values("Total_Operating_Profit", ascending=False)

retailers = retailer.rename(columns={"Total_Sales": "Total Sales",
    "Total_Operating_Profit": "Total Operating Profit"})
totals = retailers.set_index("Retailer")["Total Operating Profit"]
plt.figure(figsize=(8,6))
sns.barplot(
    x=totals.index, 
    y=totals.values, 
    palette="Blues_d"
)
plt.title("Total Operating Profit by Retailer")
plt.ylabel("Total Operating Profit ($)")
plt.xlabel("Retailer")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

retailers2 = adidas.groupby(["Retailer", "City", "State"]).agg(
    Total_Sales=("Total Sales", "sum"), Total_Operating_Profit=("Operating Profit", "sum")
).reset_index().sort_values("Total_Operating_Profit", ascending=False)

retailers2 = retailers2.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})

store_counts = retailers2["Retailer"].value_counts()
print(store_counts)

retailers2["Normalized_Profit"] = retailers2.apply(lambda row: row["Total Operating Profit"] / store_counts[row["Retailer"]],
    axis=1)

plt.figure(figsize=(10,6))
sns.boxplot(
    data=retailers2,
    x="Retailer",
    y="Normalized_Profit",
    palette="Blues")

plt.title("Normalized Operating Profit per Store Across Cities/States by Retailer")
plt.xlabel("Retailer")
plt.ylabel("Normalized Operating Profit ($)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Retailer
Foot Locker      33
Sports Direct    26
West Gear        24
Kohl's           12
Amazon            9
Walmart           6
Name: count, dtype: int64

c) Analysis 3

products = adidas.groupby("Product").agg(Total_Sales=("Total Sales", "sum"), 
                                         Total_Operating_Profit=("Operating Profit", "sum")).reset_index().sort_values("Total_Operating_Profit", ascending=False)
products = products.rename(columns={"Total_Sales": "Total Sales","Total_Operating_Profit": "Total Operating Profit"})
products
Product Total Sales Total Operating Profit
2 Men's Street Footwear 208826244 82802323
3 Women's Apparel 179038860 68650996
1 Men's Athletic Footwear 153673680 51846964
5 Women's Street Footwear 128002813 45095897
0 Men's Apparel 123728632 44763099
4 Women's Athletic Footwear 106631896 38975843
products_count = adidas.groupby(["Product"]).ngroups
products_count
#sanity check
6
plt.figure(figsize=(12,15)) #change axis a little bit
plt.barh(products["Product"], products["Total Operating Profit"], color="#BDD7EE")

plt.xlabel("Total Operating Profit ($)")
plt.title("Total Operating Profit by Product Type")

for i, v in enumerate(products["Total Operating Profit"]):
    plt.text(v, i, f"${v:,.0f}", va="center")

plt.tight_layout()
plt.show()

product_city_state = (
    adidas.groupby(["Product", "City", "State"]).agg(Total_Operating_Profit=("Operating Profit", "sum")).reset_index())
plt.figure(figsize=(14,6))
sns.boxplot(data=product_city_state, x="Product", y="Total_Operating_Profit",palette="Blues")

plt.xticks(rotation=60, ha='right')
plt.ylabel("Total Operating Profit ($)")
plt.title("Distribution of Total Operating Profit Across Cities/States by Product Type")

plt.tight_layout()
plt.show()