#Housekeeping
import sys 
import os
clear = lambda: os.system('cls')
clear()

os.chdir('[[ADD PATH]]/PF_estimation')

#Import functions stored in a folder (custom) 
import sys 
#sys.path.append("directoryname") 
import time
import pandas as pd
import numpy as np
from numpy.random import seed
from numpy.random import randn 
from optimparallel import minimize_parallel
from scipy.optimize import minimize
from scipy.optimize import fsolve
from scipy import stats
import seaborn as sns
import statsmodels.api as smf
import statsmodels.formula.api as smfform
from statsmodels.nonparametric.kernel_regression import KernelReg
import numpy as np
import time
import pyreadstat 
from matplotlib import pyplot

#Turn off pandas warnings
pd.options.mode.chained_assignment = None  # default='warn'

#Import de loecker elasticities
Delocker_elast = pyreadstat.read_dta('./deloecker/theta_W_s_window.dta')[0]

#Extend panel to Obtain the updated series
extendyr   = [2017,2018,2019,2020,2021]
industries = Delocker_elast.ind2d.unique()

 #add gaps to the new extended period
for ind in industries:
    samplepanel = pd.DataFrame({'year':extendyr , 'ind2d':ind , 'theta_WI1_ct':np.nan,  'theta_WI2_ct':np.nan,
                                'theta_WI2_xt':np.nan,  'theta_WI1_kt':np.nan,  'theta_WI2_kt':np.nan})
    Delocker_elast = pd.concat([Delocker_elast,samplepanel] )

Delocker_elast= Delocker_elast.sort_values(['ind2d', 'year'], ascending=[True, True])

Delocker_elast['theta_WI1_ct_update'] =  np.nan
Delocker_elast['theta_WI2_ct_update'] =  np.nan
#Keep only the variables of interest
Delocker_elast  = Delocker_elast[['ind2d','year','theta_WI1_ct','theta_WI1_ct_update','theta_WI2_ct','theta_WI2_ct_update']]

#import Compustat
Data = pyreadstat.read_dta('./deloecker/data_main_upd_trim_1.dta')[0]

#We will need: sales_d, cogs_d, capital_D and identifiers
#Data.columns.values.tolist() shows all names 
names              = ['sale_D','cogs_D','capital_D','xsga_D']
Data               = Data[['gvkey','year','ind2d','sale_D','cogs_D','capital_D','xsga_D']]
Productivities     = pd.DataFrame(columns=['gvkey','year','ind2d','omega'])
Productivities_ext = pd.DataFrame(columns=['gvkey','year','ind2d','omega_ext'])
#compute logs and Lag the variables 
for varname in names:
    Data['l'+varname]       = np.log(Data[varname])
    Data['lag'+'l'+varname] = Data.groupby('gvkey')['l'+varname].shift()

C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\arraylike.py:399: RuntimeWarning: divide by zero encountered in log
  result = getattr(ufunc, method)(*inputs, **kwargs)
C:\ProgramData\anaconda3\Lib\site-packages\pandas\core\arraylike.py:399: RuntimeWarning: divide by zero encountered in log
  result = getattr(ufunc, method)(*inputs, **kwargs)

def GMM_Delocker_first(theta, Estimset, X, Xlag,extend=False, optim=True):
    #The equations are (Svar and Pbar are double differences): 
    #omegait = phiit - Theta_n [cogsit kit]
    #uit = omegait - rho* omegait-1
    #The moment condition are  mean(uit cogsit)=0 and mean(uit kit)=0 
    #Parameters:
    #theta: PF elasticities
    #Estimset: estimation set (after removing NA)
    #X: Design matrix
    #Xlag: lag of X without gaps
    #extend: Default False and determines whether to include xsga as a fixed factor or not
    #optim: True by default, states whether to return only the objective function (for optimization) or all the results.
    if extend:
          Z        = np.array(Estimset[['laglcogs_D','lcapital_D','lxsga_D']])
    else:    
          Z        = np.array(Estimset[['laglcogs_D','lcapital_D']])
          
    Z        = smf.add_constant(Z)
    omega    = np.array(Estimset['phi']) - np.matmul(X,theta)   
    lagomega = np.array(Estimset['lagphi']) -  np.matmul(Xlag,theta) 
    Xom      = smf.add_constant(lagomega)
    U        = smf.OLS(omega,Xom).fit().resid
    rho      = smf.OLS(omega,Xom).fit().params 
    #objfun =  [np.mean(U*Estimset['laglcogs_D']),np.mean(U*Estimset['lcapital_D'])]
    Mom      = np.matmul(np.transpose(Z),U)
    objfun   =  np.matmul(np.transpose(Mom),Mom)
    #Save results
    if optim == False:
          Results    = {'Z': Z, 'omega':omega   , 'objfun':objfun , 'rho': rho}
          return Results
    else:
      return objfun

def GMM_Delocker(theta, thetalag, Estimset, X, Xlag,extend=False ,optim=True):
    #The equations are (Svar and Pbar are double differences): 
    #omegait = phiit - Theta_n [cogsit kit]
    #uit = omegait - rho* omegait-1
    #The moment condition are  mean(uit cogsit)=0 and mean(uit kit)=0 
    #Parameters:
    #theta: PF elasticities
    #Estimset: estimation set (after removing NA)
    #X: Design matrix
    #Xlag: lag of X without gaps
    #extend: Default False and determines whether to include xsga as a fixed factor or not
    #optim: True by default, states whether to return only the objective function (for optimization) or all the results.
    if extend:
          Z        = np.array(Estimset[['laglcogs_D','lcapital_D','lxsga_D']])
    else:    
          Z        = np.array(Estimset[['laglcogs_D','lcapital_D']]) 
    
    Z        = smf.add_constant(Z)
    omega    = np.array(Estimset['phi']) - np.matmul(X,theta)   
    lagomega = np.array(Estimset['lagphi']) -  np.matmul(Xlag,thetalag) 
    Xom      = smf.add_constant(lagomega)
    U        = smf.OLS(omega,Xom).fit().resid
    rho      = smf.OLS(omega,Xom).fit().params 
    #objfun =  [np.mean(U*Estimset['laglcogs_D']),np.mean(U*Estimset['lcapital_D'])]
    Mom      = np.matmul(np.transpose(Z),U)
    objfun   =  np.matmul(np.transpose(Mom),Mom)
    #Save results
    if optim == False:
        Results    = {'Z': Z, 'omega':omega   , 'objfun':objfun , 'rho': rho}
        return Results
    else:
     return objfun

all_years = range(1969,2022)

for ind in industries:

 print("Industry:",ind) 
 Industrydat_all = Data.loc[Data['ind2d'] == ind ]
 Industrydat_all = Industrydat_all.loc[(Industrydat_all['year']>=np.min(all_years) )& (Industrydat_all['year']<=np.max(all_years) )]
 #Construct industrydat with only cogs and capital
 Industrydat = Industrydat_all[['gvkey',  'year',  'ind2d',  'lsale_D', 'laglsale_D',  'lcogs_D', 'laglcogs_D', 'lcapital_D', 'laglcapital_D' ]]
 #Construct industrydat with, cogs, xsga and capital
 Industrydat_ext = Industrydat_all[['gvkey',  'year',  'ind2d',  'lsale_D', 'laglsale_D',  'lcogs_D', 'laglcogs_D', 'lcapital_D', 'laglcapital_D'
                                    , 'lxsga_D', 'laglxsga_D' ]]
 #Also drop inf and - inf  and na
 Industrydat     = Industrydat.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
 Industrydat_ext = Industrydat_ext.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
 for year in all_years:
     #Now, select the two consecutive years and subset the years of estimation (will start estimating at 1970) 
     Estimset     = Industrydat[Industrydat['year']==year ] 
     Estimset_ext = Industrydat_ext[Industrydat_ext['year']==year ] 
     
     print("            year:",year,  "Obs:",len(Estimset))    
   
     #Determine theta0 
     Initial_all =  Delocker_elast[Delocker_elast['ind2d'] == ind ] 
     Initial_all = Initial_all[(Initial_all['year']>=((year-10))) & (Initial_all['year']<year)][['theta_WI1_ct','theta_WI2_ct']]
     Initial     = np.mean(Initial_all['theta_WI1_ct'])
     Initial_ext = np.mean(Initial_all['theta_WI2_ct'])
     
      #Step 1: Run ols regression for the year in question
      #For standard model
     X         = np.array(Estimset[['lcogs_D','lcapital_D']])
     Xlag      = np.array(Estimset[['laglcogs_D','laglcapital_D']])
     Y         = np.array(Estimset['lsale_D'])
     Ylag      = np.array(Estimset['laglsale_D'])
     Xc        = smf.add_constant(X)
     Xlagc     = smf.add_constant(Xlag)
     #For Extended Model
     X_ext     = np.array(Estimset_ext[['lcogs_D','lcapital_D','lxsga_D']])
     Xlag_ext  = np.array(Estimset_ext[['laglcogs_D','laglcapital_D','laglxsga_D']])
     Y_ext     = np.array(Estimset_ext['lsale_D'])
     Ylag_ext  = np.array(Estimset_ext['laglsale_D'])
     Xc_ext    = smf.add_constant(X_ext)
     Xlagc_ext = smf.add_constant(Xlag_ext)
     
     OLS           = smf.OLS(Y,Xc).fit()
     OLS_ext       = smf.OLS(Y_ext,Xc_ext).fit()
     Model2        = KernelReg(endog=Y, exog=X,var_type='cc' )
     Model2lag     = KernelReg(endog=Ylag, exog=Xlag,var_type='cc' )
     Model2_ext    = KernelReg(endog=Y_ext, exog=X_ext,var_type='ccc' )
     Model2lag_ext = KernelReg(endog=Ylag_ext, exog=Xlag_ext,var_type='ccc' )
     
     
     thetaols      = OLS.params 
     thetaols_ext  = OLS_ext.params   
     Estimset['phi']         =  Model2.fit(X)[0] 
     Estimset['lagphi']      =  Model2.fit(Xlag)[0] 
     skipxsga = False
     if (ind==49) & ( year == 2021):
      skipxsga = True
      
     if(skipxsga == False):
      Estimset_ext['phi']     =  Model2_ext.fit(X_ext)[0] 
      Estimset_ext['lagphi']  =  Model2_ext.fit(Xlag_ext)[0]  
     
     #Use the average of the ten years before the sample period to compute the  (if ols gives negative numbers)
     if (thetaols[1]<0):
        print("Negative OLS elasticity estimate, using 10 year average instead")
        theta0 = np.array([Initial,Initial,Initial])  
     else:
        theta0 = np.array(thetaols)    
     if (thetaols_ext[1]<0):
        print("Negative OLS elasticity estimate, using 10 year average instead")
        theta0_ext = np.array([Initial_ext,Initial_ext,Initial_ext,Initial_ext])  
     else:
        theta0_ext = np.array(thetaols_ext)       
       
       #Step 2: Run ols GMM for the year in question
  
     if (year == 1969):
         #First do the baseline Cobb-Douglas
         o2 = minimize(fun=GMM_Delocker_first, x0=theta0,args=(Estimset,Xc,Xlagc)   ,tol=1e-10)
         theta = o2.x
         #Retrieve Results
         Res = GMM_Delocker_first(theta, Estimset,Xc,Xlagc,optim=False)
         thetalag = theta 
         #Now do the Xsga Extended Cobb-Douglas
         extend   = True
         if(skipxsga == False):
          o2_ext = minimize(fun=GMM_Delocker_first, x0=theta0_ext,args=(Estimset_ext,Xc_ext,Xlagc_ext,extend ),tol=1e-10)
          theta_ext = o2_ext.x
          #Retrieve Results
          Res_ext = GMM_Delocker_first(theta_ext, Estimset_ext,Xc_ext,Xlagc_ext,optim=False,extend=extend)
          thetalag_ext = theta_ext 
         
     else:
         #First do the baseline Cobb-Douglas
         o2 = minimize(fun=GMM_Delocker, x0=theta0,args=(thetalag, Estimset,Xc,Xlagc)   ,tol=1e-10)
         theta = o2.x
         #Retrieve Results
         Res = GMM_Delocker(theta, thetalag, Estimset,Xc,Xlagc,optim=False)   
         thetalag = theta   
         #Now do the Xsga Extended Cobb-Douglas
         extend   = True
         if(skipxsga == False):
          o2_ext = minimize(fun=GMM_Delocker, x0=theta0_ext,args=(thetalag_ext, Estimset_ext,Xc_ext,Xlagc_ext,extend) ,tol=1e-10)
          theta_ext = o2_ext.x
          #Retrieve Results
          Res_ext      = GMM_Delocker(theta_ext, thetalag_ext, Estimset_ext,Xc_ext,Xlagc_ext,extend=extend,optim=False)   
          thetalag_ext = theta_ext   
        
    #Now save and attach to the main dataset
     position = (Delocker_elast['ind2d'] == ind) & (Delocker_elast['year'] ==year)
     Delocker_elast.loc[position,"theta_WI1_ct_update"] = theta[1]
     if(skipxsga == False):
      Delocker_elast.loc[position,"theta_WI2_ct_update"] = theta_ext[1]
      Estimset_ext['omega_ext'] = Res_ext['omega']
      Productivities_ext = pd.concat([Productivities_ext, Estimset_ext[['gvkey','year','ind2d','omega_ext']]], sort=False)
     else:
      Delocker_elast.loc[position,"theta_WI2_ct_update"] = np.nan
    
     Estimset['omega'] = Res['omega'] 
     Productivities     = pd.concat([Productivities, Estimset[['gvkey','year','ind2d','omega']]], sort=False)   qa3

Industry: 11
            year: 1969 Obs: 8
            [...]
            year: 2021 Obs: 12
Industry: 21
            year: 1969 Obs: 102
            [...]
            year: 2021 Obs: 354
Industry: 22
            year: 1969 Obs: 224
            [...]
            year: 2021 Obs: 233
Industry: 23
            year: 1969 Obs: 34
            [...]
            year: 2021 Obs: 70
Industry: 31
            year: 1969 Obs: 165
            [...]
            year: 2021 Obs: 148
Industry: 32
            year: 1969 Obs: 296
           [...]
            year: 2021 Obs: 758
Industry: 33
            year: 1969 Obs: 581
            [...]
            year: 2021 Obs: 958
Industry: 42
            year: 1969 Obs: 91
           [...]
            year: 2021 Obs: 128
Industry: 44
            year: 1969 Obs: 79
           [...]
            year: 2021 Obs: 56
Industry: 45
            year: 1969 Obs: 41
            [...]
            year: 2021 Obs: 77
Industry: 48
            year: 1969 Obs: 68
            [...]
            year: 2021 Obs: 184
Industry: 49
            year: 1969 Obs: 6
           [...]
            year: 2021 Obs: 3
Industry: 51
            year: 1969 Obs: 90
            [...]
            year: 2021 Obs: 606
Industry: 52
            year: 1969 Obs: 48
            [...]
            year: 2021 Obs: 303
Industry: 53
            year: 1969 Obs: 30
            [...]
            year: 2021 Obs: 126
Industry: 54
            year: 1969 Obs: 34
            [...]
            year: 2021 Obs: 169
Industry: 56
            year: 1969 Obs: 26
            [...]
            year: 2021 Obs: 91
Industry: 61
            year: 1969 Obs: 3
            [...]
            year: 2021 Obs: 44
Industry: 62
            year: 1969 Obs: 10
            [...]
            year: 2021 Obs: 87
Industry: 71
            year: 1969 Obs: 9
            [...]
            year: 2021 Obs: 41
Industry: 72
            year: 1969 Obs: 34
            [...]
            year: 2021 Obs: 83
Industry: 81
            year: 1969 Obs: 11
            [...]
            year: 2021 Obs: 16
Industry: 99
            year: 1969 Obs: 18
            [...]
            year: 2021 Obs: 11

# Analysis of Estimated Elasticites

profit_rate = (
    Data 
    .groupby(["ind2d","year"])[["sale_D", "cogs_D","xsga_D"]]
    .sum()
    
    )

profit_rate['profit_rate'] = (profit_rate['sale_D'] -profit_rate['cogs_D'] - profit_rate['xsga_D'] 
                              )/ profit_rate['sale_D']


Delocker_elast_plots =  pd.merge(Delocker_elast,profit_rate, 
                                 how='left', 
                                 on=['year','ind2d']) 

Delocker_elast_plots = Delocker_elast_plots.loc[
    (Delocker_elast_plots["year"] >= 1970) & (Delocker_elast_plots["year"] <= 2021)].copy()
 
#add industry names
naics2_map = {
    11: "Agriculture",
    21: "Mining & Oil",
    22: "Utilities",
    23: "Construction",
    31: "Mfg: Food/Textile",
    32: "Mfg: Chemicals/Plastics",
    33: "Mfg: Metals/Machinery",
    42: "Wholesale Trade",
    44: "Retail: Stores",
    45: "Retail: Nonstore",
    48: "Transport",
    49: "Warehousing",
    51: "Information",
    52: "Finance",
    53: "Real Estate",
    54: "Prof. Services",
    55: "Management",
    56: "Admin & Support",
    61: "Education",
    62: "Healthcare",
    71: "Arts & Entertainment",
    72: "Accommodation & Food",
    81: "Other Services",
    92: "Public Admin",
    99: "Other"
}
 
Delocker_elast_plots["industry_name"] = Delocker_elast_plots["ind2d"].map(naics2_map)
     
avg = (
    Delocker_elast_plots
     .groupby("industry_name")[["theta_WI1_ct_update", "theta_WI2_ct_update"]]
    .mean()
    )

pyplot.figure(figsize=(6,6))
pyplot.scatter(avg['theta_WI1_ct_update'], avg['theta_WI2_ct_update'])
pyplot.xlabel("Elasticity of COGS (θ_WI1)")
pyplot.ylabel("Elasticity of COGS in extended model with SG&A (θ_WI2)")
pyplot.title("Average Production Elasticities")
# get axis limits (cover both x and y so they’re equal)
lims = [
    min(avg['theta_WI1_ct_update'].min(), avg['theta_WI2_ct_update'].min()),
    max(avg['theta_WI1_ct_update'].max(), avg['theta_WI2_ct_update'].max())
]

# draw 45-degree line
pyplot.plot(lims, lims, 'k--', alpha=0.8)

# set equal limits for x and y
pyplot.xlim(lims)
pyplot.ylim(lims)
pyplot.gca().set_aspect('equal', adjustable='box')

pyplot.show()
print(avg)

                         theta_WI1_ct_update  theta_WI2_ct_update
industry_name                                                    
Accommodation & Food                0.841001             0.730175
Admin & Support                     0.926873             0.701194
Agriculture                         0.856625             0.827836
Arts & Entertainment                0.983819             0.715711
Construction                        0.907885             0.767549
Education                           0.818034             0.619348
Finance                             0.857796             0.713387
Healthcare                          0.906066             0.755744
Information                         0.779578             0.541651
Mfg: Chemicals/Plastics             0.808359             0.747540
Mfg: Food/Textile                   0.924312             0.789714
Mfg: Metals/Machinery               0.852678             0.707716
Mining & Oil                        0.731751             0.635227
Other                               0.855267             0.748480
Other Services                      0.838727             0.734249
Prof. Services                      0.785833             0.708301
Real Estate                         0.832144             0.635094
Retail: Nonstore                    0.899675             0.732769
Retail: Stores                      0.813386             0.716557
Transport                           0.835681             0.733017
Utilities                           0.773288             0.696491
Warehousing                         0.875534             0.766604
Wholesale Trade                     0.888247             0.805094

#Heatmap by industry

pivot_elast_1 = (
    Delocker_elast_plots
     .pivot(index="year", columns="industry_name", values="theta_WI1_ct_update")
)


pivot_elast_2 = (
    Delocker_elast_plots
     .pivot(index="year", columns="industry_name", values="theta_WI2_ct_update")
)

#Make Yeat integer
pivot_elast_1.index = pivot_elast_1.index.astype(int)
pivot_elast_2.index = pivot_elast_2.index.astype(int)

norm = pyplot.Normalize(vmin=0, vmax=1)

fig, axes = pyplot.subplots(1, 2, figsize=(20, 8), sharey=True)

sns.heatmap(
    pivot_elast_1,
    cmap="YlGnBu",
    norm=norm,
    ax=axes[0],
    cbar_kws={"label": "Elasticity"}
)
axes[0].set_title("Elasticity of COGS (θ_WI1)", fontsize=14)
axes[0].set_xlabel("Industry")
axes[0].set_ylabel("Year")
axes[0].set_xticklabels(pivot_elast_1.columns, rotation=45, ha="right")  # ensure integer labels

sns.heatmap(
    pivot_elast_2,
    cmap="YlGnBu",
    norm=norm,
    ax=axes[1],
    cbar_kws={"label": "Elasticity"}
)
axes[1].set_title("Elasticity of COGS in extended model with SG&A (θ_WI2)", fontsize=14)
axes[1].set_xlabel("Industry")
axes[1].set_ylabel("")  # avoid repeating "Year"
axes[1].set_xticklabels(pivot_elast_2.columns, rotation=45, ha="right")

pyplot.tight_layout()
pyplot.show()

pyplot.figure(figsize=(8, 6))
sns.scatterplot(
    data=Delocker_elast_plots,
    x="theta_WI2_ct_update",
    y="profit_rate",
    hue="industry_name",
    legend="full"  # show all industries
)
pyplot.title("Profitability vs. COGS Elasticity in Extended SG&A Model")
pyplot.xlabel("COGS Production Elasticity in Extended  SG&A model (θ₂)")
pyplot.ylabel("Profit Rate")
pyplot.xlim(left=0, right=2)
pyplot.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
pyplot.show()

#Save Regression Data and demean to remove fixed effects
reg_data = Delocker_elast_plots[["profit_rate", "theta_WI1_ct_update", "theta_WI2_ct_update", "industry_name"]].dropna().copy()
reg_data["profit_rate"] = reg_data["profit_rate"] * 100
reg_data["theta_WI1_ct_update"] = reg_data["theta_WI1_ct_update"] * 100
reg_data["theta_WI2_ct_update"] = reg_data["theta_WI2_ct_update"] * 100

reg_data_demean = reg_data.copy()
reg_data_demean["profit_rate_demeaned"] = reg_data_demean.groupby("industry_name")["profit_rate"].transform(lambda x: x - x.mean())
reg_data_demean["theta_WI2_demeaned"] = reg_data_demean.groupby("industry_name")["theta_WI2_ct_update"].transform(lambda x: x - x.mean())

reg = smfform.ols("profit_rate_demeaned ~ theta_WI2_demeaned", data=reg_data_demean).fit(cov_type="cluster",
                                                                                 cov_kwds={"groups": reg_data_demean["industry_name"]})
 
full_reg = smfform.ols("profit_rate_demeaned ~ theta_WI2_demeaned:C(industry_name)", data=reg_data_demean).fit(cov_type="cluster",
                                                                                 cov_kwds={"groups": reg_data_demean["industry_name"]})
print(reg.summary())
print(full_reg.summary())

                             OLS Regression Results                             
================================================================================
Dep. Variable:     profit_rate_demeaned   R-squared:                       0.000
Model:                              OLS   Adj. R-squared:                 -0.001
Method:                   Least Squares   F-statistic:                  0.001380
Date:                  Mon, 22 Sep 2025   Prob (F-statistic):              0.971
Time:                          19:48:09   Log-Likelihood:                -3220.9
No. Observations:                  1195   AIC:                             6446.
Df Residuals:                      1193   BIC:                             6456.
Df Model:                             1                                         
Covariance Type:                cluster                                         
======================================================================================
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept           4.094e-16   3.28e-16      1.250      0.211   -2.33e-16    1.05e-15
theta_WI2_demeaned     0.0003      0.008      0.037      0.970      -0.016       0.017
==============================================================================
Omnibus:                      123.063   Durbin-Watson:                   0.511
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              816.253
Skew:                          -0.180   Prob(JB):                    5.66e-178
Kurtosis:                       7.033   Cond. No.                         14.7
==============================================================================

Notes:
[1] Standard Errors are robust to cluster correlation (cluster)
                             OLS Regression Results                             
================================================================================
Dep. Variable:     profit_rate_demeaned   R-squared:                       0.081
Model:                              OLS   Adj. R-squared:                  0.063
Method:                   Least Squares   F-statistic:                 4.135e+30
Date:                  Mon, 22 Sep 2025   Prob (F-statistic):               0.00
Time:                          19:48:09   Log-Likelihood:                -3170.4
No. Observations:                  1195   AIC:                             6389.
Df Residuals:                      1171   BIC:                             6511.
Df Model:                            23                                         
Covariance Type:                cluster                                         
================================================================================================================================
                                                                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------------------------------
Intercept                                                     4.094e-16   6.35e-16      0.645      0.519   -8.34e-16    1.65e-15
theta_WI2_demeaned:C(industry_name)[Accommodation & Food]       -0.1858    3.8e-16  -4.89e+14      0.000      -0.186      -0.186
theta_WI2_demeaned:C(industry_name)[Admin & Support]            -0.1726   6.83e-17  -2.53e+15      0.000      -0.173      -0.173
theta_WI2_demeaned:C(industry_name)[Agriculture]                 0.0734   2.33e-16   3.16e+14      0.000       0.073       0.073
theta_WI2_demeaned:C(industry_name)[Arts & Entertainment]        0.0069   5.26e-17    1.3e+14      0.000       0.007       0.007
theta_WI2_demeaned:C(industry_name)[Construction]               -0.0634   1.37e-17  -4.64e+15      0.000      -0.063      -0.063
theta_WI2_demeaned:C(industry_name)[Education]                   0.0610   2.65e-16    2.3e+14      0.000       0.061       0.061
theta_WI2_demeaned:C(industry_name)[Finance]                    -0.2734   4.79e-17   -5.7e+15      0.000      -0.273      -0.273
theta_WI2_demeaned:C(industry_name)[Healthcare]                  0.2637   1.33e-16   1.98e+15      0.000       0.264       0.264
theta_WI2_demeaned:C(industry_name)[Information]                 0.0014   1.63e-16    8.5e+12      0.000       0.001       0.001
theta_WI2_demeaned:C(industry_name)[Mfg: Chemicals/Plastics]    -0.1312   1.21e-16  -1.08e+15      0.000      -0.131      -0.131
theta_WI2_demeaned:C(industry_name)[Mfg: Food/Textile]          -0.4016   2.18e-16  -1.84e+15      0.000      -0.402      -0.402
theta_WI2_demeaned:C(industry_name)[Mfg: Metals/Machinery]       0.1744    7.3e-16   2.39e+14      0.000       0.174       0.174
theta_WI2_demeaned:C(industry_name)[Mining & Oil]                0.4498   1.37e-16   3.29e+15      0.000       0.450       0.450
theta_WI2_demeaned:C(industry_name)[Other]                       0.0064    1.7e-17   3.79e+14      0.000       0.006       0.006
theta_WI2_demeaned:C(industry_name)[Other Services]             -0.0043   2.95e-17  -1.44e+14      0.000      -0.004      -0.004
theta_WI2_demeaned:C(industry_name)[Prof. Services]             -0.0315   5.32e-17  -5.92e+14      0.000      -0.031      -0.031
theta_WI2_demeaned:C(industry_name)[Real Estate]                -0.2031    1.7e-16  -1.19e+15      0.000      -0.203      -0.203
theta_WI2_demeaned:C(industry_name)[Retail: Nonstore]           -0.0322   1.31e-17  -2.46e+15      0.000      -0.032      -0.032
theta_WI2_demeaned:C(industry_name)[Retail: Stores]              0.1287   5.38e-17   2.39e+15      0.000       0.129       0.129
theta_WI2_demeaned:C(industry_name)[Transport]                  -0.0735   7.82e-17   -9.4e+14      0.000      -0.074      -0.074
theta_WI2_demeaned:C(industry_name)[Utilities]                   0.0230   4.78e-17   4.81e+14      0.000       0.023       0.023
theta_WI2_demeaned:C(industry_name)[Warehousing]                 0.0033   6.63e-17   5.02e+13      0.000       0.003       0.003
theta_WI2_demeaned:C(industry_name)[Wholesale Trade]            -0.0429   1.85e-16  -2.32e+14      0.000      -0.043      -0.043
==============================================================================
Omnibus:                      160.527   Durbin-Watson:                   0.599
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1460.818
Skew:                          -0.264   Prob(JB):                         0.00
Kurtosis:                       8.391   Cond. No.                         24.6
==============================================================================

Notes:
[1] Standard Errors are robust to cluster correlation (cluster)

Data = pd.merge(Data,Productivities, how='left', on=['gvkey','year','ind2d']) 
Data = pd.merge(Data,Productivities_ext, how='left', on=['gvkey','year','ind2d']) 
 
#Normalize firm productivity to the historical average  
Data['omega_bar']= Data['omega'].groupby(Data['gvkey']).transform('mean')
Data['omega_tilde']= Data['omega'] -Data['omega_bar'] 
Data['omega_bar_ext']= Data['omega_ext'].groupby(Data['gvkey']).transform('mean')
Data['omega_tilde_ext']= Data['omega_ext'] -Data['omega_bar_ext'] 
#Finally save the output into .dta files
pyreadstat.write_dta(Delocker_elast,dst_path ='./output/theta_W_s_window_update.dta')
pyreadstat.write_dta(df=Data,dst_path ='./output/data_main_upd_trim_1_prod.dta')

Production Function Estimation and Extension¶

Cobb-Douglas Production Function¶

Extension¶

Method Implementation in Python¶

0. Preparation¶

1. Step 1: Kernel Regression¶

2. Step 2: Make a Guess¶

3. Step 3: Make an assumption for the Total Factor Productivity and estimate the relationship¶

Step 4: Calculate the Moment Condition¶

Extension¶

Step 5: Define the Convergence Criteria and Iterate¶

Implementation in Python¶

Analysis of Estimated Elasticites¶

Compute scatterplot of average elastiities¶

Heatmap by Industry¶

Relationship Between Profit Rates and Elasticities¶

Final Steps¶