2014 *keep if year<2016 *Drop cities with only one county egen count_county = count(county_id), by(city_id year) drop if count_county == 1"> 2014 *keep if year<2016 *Drop cities with only one county egen count_county = count(county_id), by(city_id year) drop if count_county == 1"> 2014 *keep if year<2016 *Drop cities with only one county egen count_county = count(county_id), by(city_id year) drop if count_county == 1">
cls
**==========================================
** Program Name: Construct GINI index
** Author: Yilin Chen
** Date: 2023-10-01
**------------------------------------------
** Inputs/Ouputs:
* Data files used:
* <https://raw.githubusercontent.com/eileenCHEN-9/PhD_2022_2025/main/data/county_predicted.dta>
* Data files created as intermediate product:
*===========================================
cd "/Users/yilinchen/Documents/PhD/thesis/PhD_2022_2025/data"
* ssc install ineqdeco, replace
** 1. Setup
clear all
macro drop _all
capture log close
set more off
set maxvar 32567
version 15
** 2. Import county level dataset
use county_predicted.dta, clear
*Drop Hongkong, Macau, Taiwan and Sansha city in South China Sea (no population)
drop if province_id == 810000 | province_id == 820000 | province_id == 710000
drop if city_id == 460300
*keep if year>2014
*keep if year<2016
*Drop cities with only one county
egen count_county = count(county_id), by(city_id year)
drop if count_county == 1
*drop if missing(pred_gdppc_county)
*Generate different per capita GDP
gen pred_gdppc_county=exp(lg_gdppc_predicted)
gen totallightpc_county=total_meanlight
label variable pred_gdppc_county "Predicted GDP per capita using NTL(county)"
label variable totallightpc_county "Mean of lights (county)"
label variable count_county "Number of counties per city"
summarize
describe
** 3. Calculations of regional inequality measures using predicted GDP
*GINIW
gen GINIW_pred_GDP_pc = .
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(pred_gdppc_county)
if r(N) > 0 {
qui ineqdeco pred_gdppc_county [aw=total_population] if group == `i' & !missing(pred_gdppc_county)
replace GINIW_pred_GDP_pc = r(gini) if group == `i'
}
}
drop group
*Generalized Entropy class -1
gen GE_m1W_pred_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(pred_gdppc_county)
if r(N) > 0 {
qui ineqdeco pred_gdppc_county [aw=total_population] if group == `i'
replace GE_m1W_pred_GDP_pc = r(gem1) if group == `i'
}
}
drop group
*Generalized Entropy class 0 (mean logarithmic deviation)
gen GE_0W_pred_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(pred_gdppc_county)
if r(N) > 0 {
qui ineqdeco pred_gdppc_county [aw=total_population] if group == `i'
replace GE_0W_pred_GDP_pc = r(ge0) if group == `i'
}
}
drop group
*Generalized Entropy class 1 (Theil index)
gen GE_1W_pred_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(pred_gdppc_county)
if r(N) > 0 {
qui ineqdeco pred_gdppc_county [aw=total_population] if group == `i'
replace GE_1W_pred_GDP_pc = r(ge1) if group == `i'
}
}
drop group
*COVW GE(2) is half the square of the coefficient of variation.
gen GE_2w_pred_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(pred_gdppc_county)
if r(N) > 0 {
qui ineqdeco pred_gdppc_county [aw=total_population] if group == `i'
replace GE_2w_pred_GDP_pc = r(ge2) if group == `i'
}
}
*foreach var of varlist _Icounty_id_* {
* drop `var'
*}
drop group
gen COVW_pred_GDP_pc=sqrt(GE_2w_pred_GDP_pc*2)
drop GE_2w_pred_GDP_pc
** 4. Calculations of regional inequality measures using lights
*replace totallightpc_county = . if totallightpc_county == 0
*GINIW
gen GINIW_light_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(totallightpc_county)
if r(N) > 0 {
qui ineqdeco totallightpc_county [aw=total_population] if group == `i'
replace GINIW_light_pc = r(gini) if group == `i'
}
}
drop group
*Generalized Entropy class -1
gen GE_m1W_light_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(totallightpc_county)
if r(N) > 0 {
qui ineqdeco totallightpc_county [aw=total_population] if group == `i'
replace GE_m1W_light_pc = r(gem1) if group == `i'
}
}
drop group
*Generalized Entropy class 0 (mean logarithmic deviation)
gen GE_0W_light_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(totallightpc_county)
if r(N) > 0 {
qui ineqdeco totallightpc_county [aw=total_population] if group == `i'
replace GE_0W_light_pc = r(ge0) if group == `i'
}
}
drop group
*Generalized Entropy class 1 (Theil index)
gen GE_1W_light_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(totallightpc_county)
if r(N) > 0 {
qui ineqdeco totallightpc_county [aw=total_population] if group == `i'
replace GE_1W_light_pc = r(ge1) if group == `i'
}
}
drop group
*COVW GE(2) is half the square of the coefficient of variation.
gen GE_2w_light_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(totallightpc_county)
if r(N) > 0 {
qui ineqdeco totallightpc_county [aw=total_population] if group == `i'
replace GE_2w_light_pc = r(ge2) if group == `i'
}
}
drop group
gen COVW_light_pc=sqrt(GE_2w_light_pc*2)
drop GE_2w_light_pc
** 5. Calculations of regional inequality measures using observed GDP
replace county_rgdppc = . if county_rgdppc == 0
*GINIW
gen GINIW_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(county_rgdppc)
if r(N) > 0 {
qui ineqdeco county_rgdppc [aw=total_population] if group == `i'
replace GINIW_GDP_pc = r(gini) if group == `i'
}
}
drop group
*Generalized Entropy class -1
gen GE_m1W_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(county_rgdppc)
if r(N) > 0 {
qui ineqdeco county_rgdppc [aw=total_population] if group == `i'
replace GE_m1W_GDP_pc = r(gem1) if group == `i'
}
}
drop group
*Generalized Entropy class 0 (mean logarithmic deviation)
gen GE_0W_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(county_rgdppc)
if r(N) > 0 {
qui ineqdeco county_rgdppc [aw=total_population] if group == `i'
replace GE_0W_GDP_pc = r(ge0) if group == `i'
}
}
drop group
*Generalized Entropy class 1 (Theil index)
gen GE_1W_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(county_rgdppc)
if r(N) > 0 {
qui ineqdeco county_rgdppc [aw=total_population] if group == `i'
replace GE_1W_GDP_pc = r(ge1) if group == `i'
}
}
drop group
*COVW GE(2) is half the square of the coefficient of variation.
gen GE_2w_GDP_pc=.
egen group = group(city_id year)
su group, meanonly
qui forval i = 1/`r(max)' {
qui count if group == `i' & !missing(county_rgdppc)
if r(N) > 0 {
qui ineqdeco county_rgdppc [aw=total_population] if group == `i'
replace GE_2w_GDP_pc = r(ge2) if group == `i'
}
}
drop group
gen COVW_GDP_pc=sqrt(GE_2w_GDP_pc*2)
drop GE_2w_GDP_pc
*There are negative inequality measurement due the missing values, so we replace them with . (missing)
replace GINIW_light_pc = . if GINIW_light_pc < 0
replace GE_m1W_light_pc = . if GE_m1W_light_pc < 0
replace GE_0W_light_pc = . if GE_0W_light_pc < 0
replace GE_1W_light_pc = . if GE_1W_light_pc < 0
replace COVW_light_pc = . if COVW_light_pc < 0
replace GINIW_GDP_pc = . if GINIW_GDP_pc < 0
replace GE_m1W_GDP_pc = . if GE_m1W_GDP_pc < 0
replace GE_0W_GDP_pc = . if GE_0W_GDP_pc < 0
replace GE_1W_GDP_pc = . if GE_1W_GDP_pc < 0
replace COVW_GDP_pc = . if COVW_GDP_pc < 0
*Create new id by city by year
gen id_t_j = string(year) + city
*Aggregate city-level GINI index
collapse (first) city_id year GINIW_pred_GDP_pc - COVW_GDP_pc, by(id_t_j)
sort city_id year
drop id_t_j
save "/Users/yilinchen/Documents/PhD/thesis/PhD_2022_2025/data/City_Inequality_Data.dta", replace
*Collapse to cross section
collapse (mean) GINIW_pred_GDP_pc - COVW_GDP_pc, by(city_id)
describe
summarize
*Correlations
pwcorr GINIW_pred_GDP_pc GINIW_GDP_pc GINIW_light_pc
pwcorr GE_m1W_pred_GDP_pc GE_m1W_GDP_pc GE_m1W_light_pc
pwcorr GE_0W_pred_GDP_pc GE_0W_GDP_pc GE_0W_light_pc
pwcorr GE_1W_pred_GDP_pc GE_1W_GDP_pc GE_1W_light_pc
pwcorr COVW_pred_GDP_pc COVW_GDP_pc COVW_light_pc