#Housekeeping
import sys 
import os
#Import functions stored in a folder (custom) 
import sys 
#sys.path.append("directoryname") 
import time
import pandas as pd
import numpy as np
from glob import glob
import seaborn as sns
from scipy import stats
import pyreadstat 
import matplotlib.pyplot as plt

clear = lambda: os.system('cls')
clear()

#Define main path and output path to export results
os.chdir('[[PATH]]')
out_path   = "./Output/"
os.makedirs(out_path, exist_ok=True)
#Function to automatically save each file
def save_and_show(out_path,filename):
    plt.savefig(out_path + filename, dpi=300, bbox_inches='tight')
    plt.show()

#Import Sample of Datasets to see their patterns
postings_linkedin_individual  = pd.read_csv('./Data/postings_linkedin_individual/postings_linkedin_individual_0_0_0.csv') 

#Print Datasets
 
#Tells us LinkedIn vacancies for individuals
display(postings_linkedin_individual.head(10))
display(postings_linkedin_individual.columns)

Index(['job_id', 'rcid', 'company', 'rics_k50', 'rics_k200', 'rics_k400',
       'title_raw', 'title_translated', 'role_k10', 'role_k50', 'role_k150',
       'role_k1500', 'mapped_role', 'country', 'state', 'salary', 'post_date',
       'remove_date', 'ultimate_parent_rcid', 'ultimate_parent_company_name',
       'remote_type'],
      dtype='object')

#Import Postings dataset fully. First, let's define filepaths 
raw_path   = "./Data/postings_linkedin_individual/"
os.makedirs(raw_path, exist_ok=True)

csv_files = glob(os.path.join(raw_path, "*.csv"))

display(print(f"Found {len(csv_files)} raw postings files."))

#Append all data
dfs = []
for file in csv_files:
    print("Loading:", file)
    df = pd.read_csv(file)
    dfs.append(df)

# Append into a single DataFrame
postings = pd.concat(dfs, ignore_index=True)
print("Combined shape:", postings.shape)
display(postings)

Found 32 raw postings files.

None

Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_0_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_1_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_2_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_3_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_4_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_5_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_6_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_0_7_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_0_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_1_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_2_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_3_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_4_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_5_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_6_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_1_7_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_0_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_1_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_2_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_3_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_4_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_5_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_6_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_2_7_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_0_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_1_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_2_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_3_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_4_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_5_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_6_0.csv
Loading: ./Data/postings_linkedin_individual\postings_linkedin_individual_3_7_0.csv
Combined shape: (4308312, 21)

# Summarize the data (Numerical Variables)
display(postings.describe())

freq = (
    postings.groupby('mapped_role')
      .size()                # counts rows per mapped_role
      .reset_index(name='count')
)
print(freq.head(100))

freq = (
    postings.groupby('rics_k50')
      .size()                # counts rows per industry
      .reset_index(name='count')
)
print(freq.head(100))

freq = (
    postings.groupby('country')
      .size()                # counts rows per country
      .reset_index(name='count')
)
print(freq.head(100))

#Now let's parse the month dates 
postings['post_date'] = pd.to_datetime(postings['post_date'], errors='coerce')
postings['remove_date'] = pd.to_datetime(postings['remove_date'], errors='coerce')

postings['post_quarter'] = postings['post_date'].dt.to_period('Q')
postings['remove_quarter'] = postings['remove_date'].dt.to_period('Q')

#Drop positions where the post and removal months are invalid and aggregate
postings = postings.dropna(subset=['post_quarter'])
postings = postings.dropna(subset=['remove_quarter'])
postings = postings[postings['country'] != 'empty']

#add job-post duration
postings['duration_days'] = (postings['remove_date'] - postings['post_date']).dt.days
 
#Create a Variable for US Vs Abroad
postings['country_group'] = np.where(
    postings['country'] == 'United States',
    'United States',
    'Abroad'
)
#Create a mapping to normalize presence
postings['remote_type_clean']= postings['remote_type'].str.strip().str.lower()

remote_map = {
    'fully remote': 'Remote',
    'remote in territory': 'Remote',
    'partially in office': 'Hybrid',
    'fully in office': 'Office'
}
postings['remote_group'] = postings['remote_type_clean'].map(remote_map)

#Filter to the US for the US plots
postings_usa = postings[postings['country'] == 'United States']

# Quick check
print(postings.shape)
#postings

                         mapped_role  count
0               .NET DevOps Engineer    294
1                     .NET Developer     39
2          .NET Full Stack Developer     50
3                    3D CAD Designer     15
4                         3D Creator    476
..                               ...    ...
95  Accessibility Compliance Auditor      1
96          Accessibility Consultant     81
97        Accessibility Data Analyst      2
98       Accessibility Product Owner    272
99     Accessibility Project Manager    532

[100 rows x 2 columns]
                                 rics_k50    count
0                   Aerospace and Defense       60
1         Biotech and Healthcare Services    14407
2                     Commercial Aviation      181
3               Digital Commerce Services  1394808
4               Electronics Manufacturing   453235
5                    Energy and Resources      136
6   Engineering and Construction Services        6
7                      Financial Services    46070
8        Healthcare and Wellness Services      182
9                Human Resources Services       40
10                 IT Consulting Services       55
11               Industrial Manufacturing        2
12        Information Technology Services  2215751
13           Logistics and Transportation        1
14     Marketing and Advertising Services       15
15                Media and Entertainment   177149
16                          Miscellaneous        1
17      Professional Development Services      455
18            Telecommunications Services     5758
             country  count
0        Afghanistan      4
1            Albania     11
2            Algeria     18
3             Angola    168
4          Argentina   3477
..               ...    ...
95              Oman    120
96          Pakistan     34
97         Palestine     55
98            Panama     39
99  Papua New Guinea      3

[100 rows x 2 columns]
(4287929, 27)

grouping_type = ['post_quarter',  'rics_k50'   ]
grouping_count = ['post_quarter',  'country_group' ]

postings_quarterly_type = (
    postings_usa.groupby(grouping_type)
            .agg(
                postings=('job_id', 'count'),
                avg_salary=('salary', 'mean'),
                avg_duration_days=('duration_days', 'mean')  
            )
            .reset_index()
)

#express in thousands openings
postings_quarterly_type['postings'] = postings_quarterly_type['postings']/1000
postings_quarterly_type['avg_salary'] = postings_quarterly_type['avg_salary']/1000
display(postings_quarterly_type)

postings_quarterly_country = (
    postings.groupby(grouping_count)
            .agg(
                postings=('job_id', 'count'),
                avg_salary=('salary', 'mean'),
                avg_duration_days=('duration_days', 'mean') 
            )
            .reset_index()
)
postings_quarterly_country['postings'] = postings_quarterly_country['postings']/1000
postings_quarterly_country['avg_salary'] = postings_quarterly_country['avg_salary']/1000
display(postings_quarterly_country)

eci           = pd.read_csv('./Data/public_data/ECIWAG.csv') 
gdp_growth    = pd.read_csv('./Data/public_data/A191RO1Q156NBEA.csv') 
unemployment  = pd.read_csv('./Data/public_data/UNRATE.csv') 
participation = pd.read_csv('./Data/public_data/CIVPART.csv') 

# Parse dates
eci['observation_date']           = pd.to_datetime(eci['observation_date'])
gdp_growth['observation_date']    = pd.to_datetime(gdp_growth['observation_date'])
unemployment['observation_date']  = pd.to_datetime(unemployment['observation_date'])
participation['observation_date'] = pd.to_datetime(participation['observation_date'])

eci['post_quarter']          = eci['observation_date'].dt.to_period('Q')
eci['eci_yoy'] = (
    eci['ECIWAG'].pct_change(4) * 100
)

eci                           = eci[['post_quarter', 'eci_yoy']]
gdp_growth['post_quarter']    = gdp_growth['observation_date'].dt.to_period('Q')
gdp_growth                    = gdp_growth[['post_quarter', 'A191RO1Q156NBEA']]
unemployment['post_quarter']  = unemployment['observation_date'].dt.to_period('Q')
participation['post_quarter'] = participation['observation_date'].dt.to_period('Q')

unemp_quarterly = (
    unemployment
    .groupby('post_quarter', as_index=False)['UNRATE']
    .mean()    # quarterly average unemployment rate
    .rename(columns={'UNRATE': 'unrate_us'})
)


part_quarterly = (
    participation
    .groupby('post_quarter', as_index=False)['CIVPART']
    .mean()    # quarterly average unemployment rate
    .rename(columns={'CIVPART': 'part_us'})
)
part_quarterly['part_yoy'] = (
    part_quarterly['part_us'].pct_change(4) * 100
)

macro_q = (
    eci
    .merge(gdp_growth,   on='post_quarter', how='outer')
    .merge(unemp_quarterly, on='post_quarter', how='outer')
    .merge(part_quarterly, on='post_quarter', how='outer')
)
macro_q['post_quarter_ts'] = macro_q['post_quarter'].dt.to_timestamp()
macro_q = macro_q.sort_values('post_quarter_ts')

macro_q_2020 = macro_q[ macro_q['post_quarter'] >= pd.Period('2020Q2') ]

print(macro_q_2020)

fig, ax = plt.subplots(figsize=(12, 7), constrained_layout=True)

# ---- Plot lines ----
ax.plot(
    macro_q_2020['post_quarter_ts'],
    macro_q_2020['eci_yoy'],
    label='ECI',
    linewidth=2.2,
    color='green',
    linestyle='-.'
)

ax.plot(
    macro_q_2020['post_quarter_ts'],
    macro_q_2020['A191RO1Q156NBEA'],
    label='GDP',
    linewidth=2.2,
    color='black',
    linestyle='--'
)

ax.plot(
    macro_q_2020['post_quarter_ts'],
    macro_q_2020['unrate_us'],
    label='Unemployment Rate',
    linewidth=2.2,
    color='gray',
    linestyle=':',
)

# ---- Axis labels ----
ax.set_xlabel("Quarter")
ax.set_ylabel("YoY Growth (%)")

# ---- Figure-level legend centered at the top ----
fig.legend(
    frameon=False,
    fontsize=11,
    loc="upper center",
    bbox_to_anchor=(0.5, 1.02),
    ncol=3
)

# ---- Remove all borders ----
for spine in ax.spines.values():
    spine.set_visible(False)

# ---- Grid ----
ax.grid(
    True,
    which='major',
    axis='both',
    color='lightgray',
    linewidth=0.8,
    alpha=0.6
)
ax.set_axisbelow(True)

# ---- X-axis tick formatting ----
macro_q_2020_sorted = (
    macro_q_2020
        .sort_values('post_quarter_ts')
        .drop_duplicates('post_quarter')
)

ax.set_xticks(macro_q_2020_sorted['post_quarter_ts'])
ax.set_xticklabels(macro_q_2020_sorted['post_quarter'].astype(str))

ax.tick_params(axis='x', length=0)
plt.xticks(rotation=45, fontsize=11)
plt.yticks(fontsize=11)

# ---- Save & show ----
save_and_show(out_path, "macro_vars.png")

    post_quarter   eci_yoy  A191RO1Q156NBEA  unrate_us    part_us  \
289       2020Q2  2.923977             -7.4  13.000000  60.800000   
290       2020Q3  2.683104             -1.4   8.800000  61.533333   
291       2020Q4  2.807775             -0.9   6.766667  61.566667   
292       2021Q1  2.993585              1.8   6.233333  61.433333   
293       2021Q2  3.551136             12.4   5.933333  61.633333   
294       2021Q3  4.519774              5.2   5.066667  61.733333   
295       2021Q4  4.971989              5.8   4.200000  61.900000   
296       2022Q1  4.982699              4.0   3.833333  62.233333   
297       2022Q2  5.624143              2.5   3.633333  62.233333   
298       2022Q3  5.270270              2.3   3.533333  62.233333   
299       2022Q4  5.203469              1.3   3.566667  62.200000   
300       2023Q1  5.075808              2.3   3.533333  62.500000   
301       2023Q2  4.610390              2.8   3.533333  62.600000   
302       2023Q3  4.492940              3.2   3.666667  62.700000   
303       2023Q4  4.248573              3.4   3.800000  62.666667   
304       2024Q1  4.265997              2.9   3.833333  62.600000   
305       2024Q2  4.034761              3.1   4.000000  62.633333   
306       2024Q3  3.746929              2.8   4.166667  62.700000   
307       2024Q4  3.710462              2.4   4.133333  62.500000   
308       2025Q1  3.369434              2.0   4.100000  62.500000   
309       2025Q2  3.559666              2.1   4.166667  62.433333   
310       2025Q3       NaN              NaN   4.300000  62.300000   

         part_yoy post_quarter_ts  
289 -3.338633e+00      2020-04-01  
290 -2.534319e+00      2020-07-01  
291 -2.738283e+00      2020-10-01  
292 -2.589852e+00      2021-01-01  
293  1.370614e+00      2021-04-01  
294  3.250271e-01      2021-07-01  
295  5.414185e-01      2021-10-01  
296  1.302225e+00      2022-01-01  
297  9.734992e-01      2022-04-01  
298  8.099352e-01      2022-07-01  
299  4.846527e-01      2022-10-01  
300  4.284949e-01      2023-01-01  
301  5.891805e-01      2023-04-01  
302  7.498661e-01      2023-07-01  
303  7.502680e-01      2023-10-01  
304  1.600000e-01      2024-01-01  
305  5.324814e-02      2024-04-01  
306  2.220446e-14      2024-07-01  
307 -2.659574e-01      2024-10-01  
308 -1.597444e-01      2025-01-01  
309 -3.193188e-01      2025-04-01  
310 -6.379585e-01      2025-07-01

#Create a full quarterly date range
all_quarters = pd.period_range(
    start=postings_quarterly_type['post_quarter'].min(),
    end=postings_quarterly_type['post_quarter'].max(),
    freq='Q'
)

#Reindex each rics_k50 to include all quarters
def reindex_group(df):
    df = df.set_index('post_quarter').reindex(all_quarters)
    return df

postings_filled = (
    postings_quarterly_type
        .set_index(['rics_k50', 'post_quarter'])
        .groupby(level=0)
        .apply(lambda g: g.droplevel(0).reindex(all_quarters))
)

postings_filled.index = postings_filled.index.set_names(['rics_k50', 'post_quarter'])
postings_filled = postings_filled.reset_index()


postings_filled['postings'] = postings_filled['postings'].fillna(0)
# Compute total postings per rics_k50  (most aggregated) 
postings_quarterly_type['post_quarter_ts'] = (
    postings_quarterly_type['post_quarter'].dt.to_timestamp()
)

# Sort for moving average
postings_filled = postings_filled.sort_values(['rics_k50', 'post_quarter'])
# Add a 4-quarter moving average (centered or trailing)
postings_filled['postings_ma4'] = (
    postings_filled
        .groupby('rics_k50')['postings']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)

postings_filled['avg_salary_ma4'] = (
    postings_filled
        .groupby('rics_k50')['avg_salary']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)

postings_filled['avg_duration_days_ma4'] = (
    postings_filled
        .groupby('rics_k50')['avg_duration_days']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)

postings_filled['quarter_ts'] = postings_filled['post_quarter'].dt.to_timestamp()

# 1. Compute variation on smoothed value per rics_k50
variation = (
    postings_filled
      .groupby('rics_k50')['postings_ma4']
      .std()                      # or .var() for variance, .max()- .min() for range
      .sort_values(ascending=False)
)

# 2. Take the top N most variable categories
top_var_rics = variation.head(3).index.tolist()
#print(variation)
print("Most variable RICS_K50:", top_var_rics)

# 4. Filter postings_filled to just those categories
plot_data = postings_filled[postings_filled['rics_k50'].isin(top_var_rics)]


plot_data =(plot_data
    .merge(macro_q_2020[['post_quarter', 'eci_yoy', 'A191RO1Q156NBEA','unrate_us','part_yoy']],   on=['post_quarter'], how='left') 
                         )

macro_for_plot = (
    plot_data
    .sort_values('quarter_ts')
    .drop_duplicates(subset=['quarter_ts'])
    [['quarter_ts', 'eci_yoy', 'A191RO1Q156NBEA', 'unrate_us','part_yoy']]
)

Most variable RICS_K50: ['Information Technology Services', 'Digital Commerce Services', 'Electronics Manufacturing']

pivot_postings = postings_filled.pivot_table(
    index='post_quarter',        # rows = quarters
    columns='rics_k50',          # columns = industries/roles
    values='postings_ma4',       # heat value (smoothed postings) 
)
#pivot_dev = (pivot_postings - pivot_postings.mean()) / pivot_postings.std()
pivot_dev = pivot_postings / pivot_postings.mean()
#print(pivot_postings.mean() )
# Sort time for cleaner heatmap
pivot_postings = pivot_postings.sort_index()
#print(pivot_dev)
fig, ax = plt.subplots(figsize=(14, 8))

sns.heatmap(
    pivot_dev,
    cmap="YlGnBu",                      # no centering
    ax=ax,
    cbar_kws={"label": "Deviation from Historical Mean (Postings)"}
)

#ax.set_title("Job Postings by Industry (Deviation from Historical Mean, 4Q MA)", fontsize=14)
ax.set_xlabel("Industry")
ax.set_ylabel("Quarter")

# Rotate x-axis labels for readability
ax.set_xticklabels(
    pivot_postings.columns,
    rotation=45,
    ha="right"
)

plt.tight_layout()

save_and_show(out_path,"openings_heatmap.png")

plt.figure(figsize=(12, 7))

ax1 = plt.gca()

# ---- Remove ALL spines on primary axis ----
for spine in ax1.spines.values():
    spine.set_visible(False)

# ---- Primary axis: postings by rics_k50 ----
for rics in plot_data['rics_k50'].unique():
    subset = plot_data[plot_data['rics_k50'] == rics]
    ax1.plot(
        subset['quarter_ts'],
        subset['postings_ma4'],
        label=rics,
        alpha=0.8
    )

ax1.set_xlabel("Quarter")
ax1.set_ylabel("Postings (thousands)")
ax1.tick_params(axis='y', labelsize=11)
ax1.tick_params(axis='x', labelsize=11, rotation=45)

# ---- Use quarter strings as x-axis labels ----
# Sort by quarter_ts so ticks/labels are in order
plot_data_sorted = plot_data.sort_values('quarter_ts').drop_duplicates('quarter_ts')

ax1.set_xticks(plot_data_sorted['quarter_ts'])
ax1.set_xticklabels(plot_data_sorted['post_quarter'])

# ---- Light grid ----
ax1.grid(
    True,
    which='major',
    axis='both',
    color='lightgray',
    linewidth=0.8,
    alpha=0.6
)
ax1.set_axisbelow(True)
ax1.tick_params(axis='x', length=0)

# ---- Secondary axis: GDP YoY ----
ax2 = ax1.twinx()

ax2.plot(
    macro_for_plot['quarter_ts'],
    macro_for_plot['A191RO1Q156NBEA'],
    linestyle='--',
    linewidth=2.5,
    label='GDP Growth',
    color='black' 
)

ax2.set_ylabel("US GDP Growth (%)", fontsize=12)
ax2.tick_params(axis='y', labelsize=11)

# ---- Remove ALL spines on secondary axis ----
for spine in ax2.spines.values():
    spine.set_visible(False)

# ---- Combined legend outside ----
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=len(labels1 + labels2),
    frameon=False,
    fontsize=10
)

plt.tight_layout()
save_and_show(out_path, "openings_top3.png")

# Plot average wages
plt.figure(figsize=(12, 7))

ax1 = plt.gca()

# ---- Primary axis: avg salary by rics_k50 ----
for rics in plot_data['rics_k50'].unique():
    subset = plot_data[plot_data['rics_k50'] == rics]
    ax1.plot(
        subset['quarter_ts'],
        subset['avg_salary_ma4'],
        label=rics,
        alpha=0.85,
        linewidth=2
    )

ax1.set_xlabel("Quarter", fontsize=12)
ax1.set_ylabel("Average Salary (thousands $)", fontsize=12)
ax1.tick_params(axis='y', labelsize=11)
ax1.tick_params(axis='x', rotation=45, labelsize=11)

# ---- Remove ALL spines on primary axis ----
for spine in ax1.spines.values():
    spine.set_visible(False)

# ---- Light gray grid ----
ax1.grid(
    True, which='major', axis='both',
    color='lightgray', linewidth=0.8, alpha=0.6
)
ax1.set_axisbelow(True)
ax1.tick_params(axis='x', length=0)
# ---- Secondary axis: ECI YoY ----
ax2 = ax1.twinx()
ax2.plot(
    macro_for_plot['quarter_ts'],
    macro_for_plot['eci_yoy'],
    linestyle='-.',
    linewidth=2.5,
    color='green',
    label='ECI YoY (%)'
)

ax2.set_ylabel("Change in ECI (%)", fontsize=12)
ax2.tick_params(axis='y', labelsize=11)

# ---- Remove ALL spines on secondary axis ----
for spine in ax2.spines.values():
    spine.set_visible(False)

# ---- Quarter labels on x-axis ----
plot_data_sorted = (
    plot_data
    .sort_values('quarter_ts')
    .drop_duplicates('quarter_ts')
)

ax1.set_xticks(plot_data_sorted['quarter_ts'])
ax1.set_xticklabels(plot_data_sorted['post_quarter'].astype(str))

# ---- Combined legend ABOVE the plot ----
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=len(labels1 + labels2),
    frameon=False,
    fontsize=10
)

plt.tight_layout()
save_and_show(out_path, "wages_openings_top3.png")

# Plot average Job opening duration
plt.figure(figsize=(12, 7))

ax1 = plt.gca()

# ---- Primary axis: avg duration by rics_k50 ----
for rics in plot_data['rics_k50'].unique():
    subset = plot_data[plot_data['rics_k50'] == rics]
    ax1.plot(
        subset['quarter_ts'],
        subset['avg_duration_days_ma4'],
        label=rics,
        alpha=0.85,
        linewidth=2
    )

ax1.set_xlabel("Quarter", fontsize=12)
ax1.set_ylabel("Vacancy Duration (days)", fontsize=12)
ax1.tick_params(axis='y', labelsize=11)
ax1.tick_params(axis='x', rotation=45, labelsize=11)
ax1.tick_params(axis='x', length=0)
# ---- Remove ALL plot borders ----
for spine in ax1.spines.values():
    spine.set_visible(False)

# ---- Light gray grid ----
ax1.grid(
    True, which='major', axis='both',
    color='lightgray', linewidth=0.8, alpha=0.6
)
ax1.set_axisbelow(True)

# ---- Secondary axis: unemployment rate ----
ax2 = ax1.twinx()
ax2.plot(
    macro_for_plot['quarter_ts'],
    macro_for_plot['part_yoy'],
    linestyle='-.',
    linewidth=2.5,
    color='dimgray',
    label='Change in Participation Rate'
)

ax2.set_ylabel("Change in Participation Rate (%)", fontsize=12)
ax2.tick_params(axis='y', labelsize=11)

# ---- Remove ALL spines on secondary axis ----
for spine in ax2.spines.values():
    spine.set_visible(False)

# ---- Quarter labels on x-axis ----
plot_data_sorted = (
    plot_data
    .sort_values('quarter_ts')
    .drop_duplicates('quarter_ts')
)

ax1.set_xticks(plot_data_sorted['quarter_ts'])
ax1.set_xticklabels(plot_data_sorted['post_quarter'].astype(str))

# ---- Combined legend ABOVE the plot ----
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=len(labels1 + labels2),
    frameon=False,
    fontsize=10
)

plt.tight_layout()
save_and_show(out_path, "duration_openings_top3.png")

# Convert quarterly Period for plotting
postings_quarterly_country['post_quarter_ts'] = (
    postings_quarterly_country['post_quarter'].dt.to_timestamp()
)
# Sort  teh data
postings_quarterly_country = postings_quarterly_country.sort_values(['country_group', 'post_quarter_ts'])
# 4-quarter moving averages by country_group
postings_quarterly_country['postings_ma4'] = (
    postings_quarterly_country
        .groupby('country_group')['postings']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)

postings_quarterly_country['avg_salary_ma4'] = (
    postings_quarterly_country
        .groupby('country_group')['avg_salary']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)

postings_quarterly_country['avg_duration_days_ma4'] = (
    postings_quarterly_country
        .groupby('country_group')['avg_duration_days']
        .transform(lambda s: s.rolling(window=4, min_periods=1).mean())
)
 
#display(postings_quarterly_country)

plt.figure(figsize=(12, 7))
ax1 = plt.gca()

# ---- Primary axis: US vs Abroad postings ----
for region in ['United States', 'Abroad']:
    subset = postings_quarterly_country[
        postings_quarterly_country['country_group'] == region
    ]
    ax1.plot(
        subset['post_quarter_ts'],
        subset['postings_ma4'],
        label=region,
        linewidth=2,
        alpha=0.85
    )

ax1.set_xlabel("Quarter", fontsize=12)
ax1.set_ylabel("Job Postings (thousands)", fontsize=12)
ax1.tick_params(axis='y', labelsize=11)
ax1.tick_params(axis='x', rotation=45, labelsize=11)
ax1.tick_params(axis='x', length=0)
# ---- Remove ALL spines on primary axis ----
for spine in ax1.spines.values():
    spine.set_visible(False)

# ---- Light gray grid ----
ax1.grid(
    True, which='major', axis='both',
    color='lightgray', linewidth=0.8, alpha=0.6
)
ax1.set_axisbelow(True)
ax1.tick_params(axis='x', length=0)
# ---- Secondary axis: Unemployment rate ----
ax2 = ax1.twinx()
ax2.plot(
    macro_for_plot['quarter_ts'],
    macro_for_plot['unrate_us'],
    linestyle=':',
    linewidth=2.5,
    color='gray',
    label="US Unemployment Rate"
)

ax2.set_ylabel("Unemployment Rate (%)", fontsize=12)
ax2.tick_params(axis='y', labelsize=11)

# ---- Remove ALL spines on secondary axis ----
for spine in ax2.spines.values():
    spine.set_visible(False)

# ---- Quarter labels on x-axis ----
postings_sorted = (
    postings_quarterly_country
    .sort_values('post_quarter_ts')
    .drop_duplicates('post_quarter_ts')
)

ax1.set_xticks(postings_sorted['post_quarter_ts'])
ax1.set_xticklabels(postings_sorted['post_quarter'].astype(str))

# ---- Combined legend ABOVE plot ----
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=len(labels1 + labels2),
    frameon=False,
    fontsize=10
)

plt.tight_layout()
save_and_show(out_path, "openings_us_abroad.png")

plt.figure(figsize=(12, 7))
ax1 = plt.gca()

# ---- Primary axis: duration by region ----
for region in ['United States', 'Abroad']:
    subset = postings_quarterly_country[
        postings_quarterly_country['country_group'] == region
    ]
    ax1.plot(
        subset['post_quarter_ts'],
        subset['avg_duration_days_ma4'],
        label=region,
        linewidth=2,
        alpha=0.85
    )

ax1.set_xlabel("Quarter", fontsize=12)
ax1.set_ylabel("Vacancy Duration (days)", fontsize=12)
ax1.tick_params(axis='y', labelsize=11)
ax1.tick_params(axis='x', rotation=45, labelsize=11)

# ---- Remove ALL spines on primary axis ----
for spine in ax1.spines.values():
    spine.set_visible(False)

# ---- Grid behind data ----
ax1.grid(
    True, which='major', axis='both',
    color='lightgray', linewidth=0.8, alpha=0.6
)
ax1.set_axisbelow(True)
ax1.tick_params(axis='x', length=0)
# ---- Secondary axis: Unemployment rate ----
ax2 = ax1.twinx()
ax2.plot(
    macro_for_plot['quarter_ts'],
    macro_for_plot['unrate_us'],
    linestyle=':',
    linewidth=2.5,
    color='gray',
    label="Unemployment Rate"
)

ax2.set_ylabel("Unemployment Rate (%)", fontsize=12)
ax2.tick_params(axis='y', labelsize=11)

# ---- Remove ALL spines on secondary axis ----
for spine in ax2.spines.values():
    spine.set_visible(False)

# ---- Quarter labels on x-axis ----
postings_sorted = (
    postings_quarterly_country
    .sort_values('post_quarter_ts')
    .drop_duplicates('post_quarter_ts')
)

ax1.set_xticks(postings_sorted['post_quarter_ts'])
ax1.set_xticklabels(postings_sorted['post_quarter'].astype(str))

# ---- Combined legend ABOVE the plot ----
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()

ax1.legend(
    lines1 + lines2,
    labels1 + labels2,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=len(labels1 + labels2),
    frameon=False,
    fontsize=10
)

plt.tight_layout()
save_and_show(out_path, "duration_us_abroad.png")

	job_id	rcid	company	rics_k50	rics_k200	rics_k400	title_raw	title_translated	role_k10	role_k50	...	role_k1500	mapped_role	country	state	salary	post_date	remove_date	ultimate_parent_rcid	ultimate_parent_company_name	remote_type
0	323493558100000000005	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Product Marketing Manager (Multiple Roles, inc...	product marketing manager multiple roles inclu...	Sales and Marketing	Product Marketing	...	Product Manager	Product Marketing	United States	Mississippi	126104.087265	2022-10-02	2022-10-24	350953	Microsoft Corp.	Fully in Office
1	3958486090	1475220	Google Cloud	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Workday Solutions Consultant	workday solutions consultant	Project and IT Specialist	Systems Analyst	...	HCM Systems Analyst	Workday Consultant	Ireland	Leinster	72701.937145	2024-06-25	2024-07-31	766823	Alphabet, Inc.	Fully in Office
2	322561550000000000003	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	U.S. DPSS SR CATEGORY MANAGER	us dpss sr category manager	Sales and Marketing	E-commerce Brand Manager	...	Retail Management	Global Retail Director	United States	Alabama	114703.959755	2022-09-03	2022-09-19	350953	Microsoft Corp.	Fully in Office
3	2569306356	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Services Executive	services executive	Sales and Marketing	Sales Representative	...	Sales Account Executive	Regional Account Executive	Australia	New South Wales	99840.725230	2021-04-16	2021-05-16	350953	Microsoft Corp.	Fully in Office
4	2155368791	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Research Program Manager, FRL	research program manager frl	Project and IT Specialist	Project Consultant	...	Program Manager	Sector Program Manager	United States	Washington	124044.836609	2020-06-16	2020-08-12	1233178	Meta Platforms, Inc.	Fully in Office
5	2367598223	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Production Engineer	production engineer	Engineer	Engineer	...	Manufacturing Engineer	Production Engineer	United States	Washington	78150.000000	2020-12-21	2021-02-16	1233178	Meta Platforms, Inc.	Fully in Office
6	2688417147	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Product Marketing Manager, Teams Usage - US M&O	product marketing manager teams usage us m a...	Sales and Marketing	Marketing Coordinator	...	Content Marketing	Marketing Director	United States	Washington	143389.448057	2021-08-23	2021-09-14	350953	Microsoft Corp.	Fully in Office
7	266713218300000000008	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Service Engineer Azure Data Lake Storage - CTJ	service engineer azure data lake storage ctj	Software Engineer	Software Developer	...	Infrastructure Engineer	Storage Engineer	United States	Washington	130000.000000	2021-11-11	2021-11-18	350953	Microsoft Corp.	Fully in Office
8	368576882600000000009	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Software Engineer, Infrastructure - Monetization	software engineer, infrastructure monetization	Software Engineer	Software Developer	...	Software Engineer	Software Engineering	United States	California	155553.561120	2023-09-28	2023-10-01	1233178	Meta Platforms, Inc.	Fully in Office
9	3985339558	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Principal Corporate Counsel, Land Acquisition,...	principal corporate counsel, land acquisition,...	Finance	Corporate Attorney	...	Corporate Counsel	Mergers and Acquisitions Attorney	United States	Arkansas	186100.000000	2024-07-27	2024-08-22	350953	Microsoft Corp.	Fully in Office

	job_id	rcid	company	rics_k50	rics_k200	rics_k400	title_raw	title_translated	role_k10	role_k50	...	role_k1500	mapped_role	country	state	salary	post_date	remove_date	ultimate_parent_rcid	ultimate_parent_company_name	remote_type
0	323493558100000000005	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Product Marketing Manager (Multiple Roles, inc...	product marketing manager multiple roles inclu...	Sales and Marketing	Product Marketing	...	Product Manager	Product Marketing	United States	Mississippi	126104.087265	2022-10-02	2022-10-24	350953	Microsoft Corp.	Fully in Office
1	3958486090	1475220	Google Cloud	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Workday Solutions Consultant	workday solutions consultant	Project and IT Specialist	Systems Analyst	...	HCM Systems Analyst	Workday Consultant	Ireland	Leinster	72701.937145	2024-06-25	2024-07-31	766823	Alphabet, Inc.	Fully in Office
2	322561550000000000003	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	U.S. DPSS SR CATEGORY MANAGER	us dpss sr category manager	Sales and Marketing	E-commerce Brand Manager	...	Retail Management	Global Retail Director	United States	Alabama	114703.959755	2022-09-03	2022-09-19	350953	Microsoft Corp.	Fully in Office
3	2569306356	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Services Executive	services executive	Sales and Marketing	Sales Representative	...	Sales Account Executive	Regional Account Executive	Australia	New South Wales	99840.725230	2021-04-16	2021-05-16	350953	Microsoft Corp.	Fully in Office
4	2155368791	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Research Program Manager, FRL	research program manager frl	Project and IT Specialist	Project Consultant	...	Program Manager	Sector Program Manager	United States	Washington	124044.836609	2020-06-16	2020-08-12	1233178	Meta Platforms, Inc.	Fully in Office
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4308307	376866964500000000002	233459	Google LLC	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Global Partner Go-to-Market Leader, Workspace,...	global partner go to market leader, workspace,...	Project and IT Specialist	Project Consultant	...	Cloud Advisor	Cloud Business Manager	United States	Washington	255334.677535	2023-12-05	2023-12-11	766823	Alphabet, Inc.	Fully in Office
4308308	2944948490	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Director, Compensation Business Partner, Reali...	director compensation business partner reality...	Finance	Claims Adjuster	...	Compensation and Benefits	Executive Compensation	United States	California	120065.543326	2022-02-28	2022-03-04	1233178	Meta Platforms, Inc.	Fully in Office
4308309	2759295280	350953	Microsoft Corp.	Information Technology Services	Enterprise Software and IT Services	Enterprise Software and Cloud Services	Principal Software Engineer	principal software engineer	Software Engineer	Software Developer	...	Software Engineer	Software Engineering	United States	North Carolina	161000.000000	2021-09-23	2021-10-23	350953	Microsoft Corp.	Fully in Office
4308310	2599905190	1233178	Meta Platforms, Inc.	Digital Commerce Services	Digital Commerce Services	Tech and E-Commerce Platforms	Computer Vision Silicon Architect	computer vision silicon architect	Software Engineer	Software Developer	...	Computer Vision Engineer	Vision Engineer	United States	Texas	191951.120577	2021-07-07	2021-08-20	1233178	Meta Platforms, Inc.	Fully in Office
4308311	314681742100000000004	1223555	NVIDIA Corp.	Electronics Manufacturing	Electronics and Components Manufacturing	Semiconductor and Electronics Manufacturing	Senior Platform Architect – Autonomous Vehicles	senior platform architect autonomous vehicles	Software Engineer	Software Developer	...	Robotics Engineer	Autonomous Software Engineer	United Kingdom	empty	102236.850354	2022-08-19	2022-08-23	1223555	NVIDIA Corp.	Fully in Office

	rcid	salary	ultimate_parent_rcid
count	4.308312e+06	4.308009e+06	4.308312e+06
mean	8.057071e+05	1.240541e+05	7.682370e+05
std	1.943668e+06	5.663781e+04	1.250954e+06
min	7.000000e+00	1.000000e+00	3.509530e+05
25%	3.509530e+05	8.384404e+04	3.509530e+05
50%	3.509530e+05	1.263115e+05	3.509530e+05
75%	1.233178e+06	1.647776e+05	1.233178e+06
max	1.018297e+08	8.771640e+05	2.214407e+07

	post_quarter	country_group	postings	avg_salary	avg_duration_days
0	2020Q2	Abroad	8.000	56.959048	35.682750
1	2020Q2	United States	24.426	125.530968	47.620077
2	2020Q3	Abroad	0.784	58.710923	34.204082
3	2020Q3	United States	1.601	129.414142	38.751405
4	2020Q4	Abroad	6.888	59.095396	37.583333
5	2020Q4	United States	19.325	136.683236	48.767400
6	2021Q1	Abroad	49.906	67.498552	33.861019
7	2021Q1	United States	77.221	143.825382	39.981767
8	2021Q2	Abroad	32.856	63.868718	33.847029
9	2021Q2	United States	58.293	141.808911	37.108624
10	2021Q3	Abroad	83.843	65.253131	21.535000
11	2021Q3	United States	351.688	150.260336	19.934291
12	2021Q4	Abroad	165.822	65.740650	19.092274
13	2021Q4	United States	850.501	145.915810	13.434701
14	2022Q1	Abroad	148.523	63.747705	15.263670
15	2022Q1	United States	472.499	138.649660	13.499783
16	2022Q2	Abroad	135.457	67.470264	12.020028
17	2022Q2	United States	284.757	138.361645	11.040491
18	2022Q3	Abroad	83.454	64.311301	9.225442
19	2022Q3	United States	251.193	140.456108	8.796248
20	2022Q4	Abroad	68.018	70.260510	8.518789
21	2022Q4	United States	138.074	149.155154	8.084237
22	2023Q1	Abroad	27.301	64.991395	12.911212
23	2023Q1	United States	51.122	147.164577	12.308576
24	2023Q2	Abroad	18.187	66.939652	20.200748
25	2023Q2	United States	37.977	149.595896	18.684599
26	2023Q3	Abroad	32.487	69.304807	11.913966
27	2023Q3	United States	48.428	152.146161	13.864004
28	2023Q4	Abroad	31.847	66.509893	13.241216
29	2023Q4	United States	45.094	155.381812	16.345988
30	2024Q1	Abroad	38.248	63.244093	18.717292
31	2024Q1	United States	40.332	161.304956	19.551622
32	2024Q2	Abroad	64.438	64.681478	19.841817
33	2024Q2	United States	93.381	171.854325	21.661473
34	2024Q3	Abroad	66.421	66.231270	21.080170
35	2024Q3	United States	91.256	169.679035	23.690574
36	2024Q4	Abroad	41.971	57.432720	24.894546
37	2024Q4	United States	54.962	169.070233	25.569430
38	2025Q1	Abroad	37.050	56.013724	20.784831
39	2025Q1	United States	37.602	164.700339	22.715973
40	2025Q2	Abroad	31.300	53.371588	22.865591
41	2025Q2	United States	34.678	173.881680	25.686977
42	2025Q3	Abroad	28.234	58.294565	16.013530
43	2025Q3	United States	22.484	188.173701	19.351672

Job Postings Collapse in Key Sectors — Are We Entering a Labor Market Cooldown?¶

0. Preparation¶

1. Importing the Postings Dataset¶

2. Importing Publicly Available Data¶

3. Analyzing U.S. Labor Market Trends (openings, salaries, and job vacancy duration)¶

3.1 Constructing the dataset for the Plots.¶

3.2 Documenting Movement in Labor Market Trends.¶

4. Documenting Movement in Labor Market Trends between the U.S and Other Labor Markets.¶

	post_quarter	rics_k50	postings	avg_salary	avg_duration_days
0	2020Q2	Aerospace and Defense	0.005	127.522810	57.000000
1	2020Q2	Biotech and Healthcare Services	0.024	92.333761	45.500000
2	2020Q2	Digital Commerce Services	17.347	122.676433	52.809419
3	2020Q2	Electronics Manufacturing	1.355	128.493850	39.053137
4	2020Q2	Financial Services	0.340	135.161774	40.050000
...	...	...	...	...	...
198	2025Q3	Electronics Manufacturing	2.325	242.462399	21.620645
199	2025Q3	Financial Services	0.282	177.118775	22.145390
200	2025Q3	Information Technology Services	13.386	179.629806	16.415434
201	2025Q3	Media and Entertainment	0.285	131.651700	24.750877
202	2025Q3	Telecommunications Services	0.061	117.115892	26.147541