Elezioni Regionali: Liguria del 27-28 ottobre 2024¶

Dal sito eligendo... https://elezioni.interno.gov.it/risultati/20241027/regionali/scrutini/italia/07

In [1]:
import os
import requests
import json
import folium
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import pandas as pd
from datetime import datetime
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# from itables import init_notebook_mode, show
# init_notebook_mode(all_interactive=True)
In [2]:
elezioni_path =  os.path.join(os.path.expanduser('~'),'ILAB_DATA','LIGURIA_2024')
In [3]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Referer': 'https://elezioni.interno.gov.it/'
}

Reperimento informazioni territoriali (comuni della regione)¶

In [4]:
url = 'https://elezioni.interno.gov.it/mappe/comuni_07.geojson'
comuni_07 = gpd.read_file(url)
comuni_07 = comuni_07.to_dict('records')
In [5]:
variables = [ 'dt_ele', 'cod_sez', 'desc_sez', 'desc_com',  'ele_m', 'ele_f',
        'ele_t', 'vot_m', 'vot_f', 'vot_t', 'perc_vot', 'sz_perv', 'sz_tot', 
        'fine_rip', 'sk_bianche', 'sk_nulle','sk_contestate', 'tot_vot_lis', 
        'non_valid',  'osp', 'data_prec_elez','circ_sto', 'reg_sto', 'prov_sto', 'comu_sto', 'sez_sto']

Scraping dei dati degli scrutini¶

In [6]:
# n_comuni = len(comuni_07)
# errors = 0
# comuni = []
# for i, comune_key in enumerate(comuni_07):
#     comune_name = comune_key['name']
#     minint_elettorale = comune_key['minint_elettorale']
#     cod_reg = minint_elettorale[:3]
#     cod_prov = minint_elettorale[3:6]
#     cod_com = minint_elettorale[6:]
#     print(f'Sto elaborando {comune_name} ({i+1}/{n_comuni})')
#     try:
#         api_endpoint = f"https://eleapi.interno.gov.it/siel/PX/scrutiniR/DE/20241027/TE/07/RE/07/PR/{cod_prov}/CM/{cod_com}"
#         response = requests.get(api_endpoint, verify=True, headers=headers).json()
#         comune = {k:v for k,v in response['int'].items() if k in variables}
#         comune['cod_prov'] = cod_prov
#         comune['cod_com'] = cod_com
#         comune['minint_elettorale'] = minint_elettorale
#         comune['dt_ele'] = datetime.strptime(str(comune['dt_ele']),'%Y%m%d%H%M%S').strftime('%Y-%m-%d')
#         if comune['ele_t'] < comune['vot_t']:
#             comune['perc_vot'] = '0'
#         comune['perc_vot'] = float(comune['perc_vot'].replace(',','.'))
#         for p in response['cand']:
#             nome_cand = p['nome'] + ' ' + p['cogn']
#             comune[f'{nome_cand}_voti'] = p['voti']
#             comune[f'{nome_cand}_perc'] = float(p['perc'].replace(',','.'))
#         comuni.append(comune)
        
#     except Exception as e:
#         print(e)
#         errors+=1

# df = pd.DataFrame(comuni) 
# print(df.shape)
# df.to_csv(os.path.join(elezioni_path,'CSV/scrutini_liguria.csv'), sep=';',index=False)

Visualizzazione dei dati¶

In [7]:
types = {'cod_com':str, 'cod_prov':str,'minint_elettorale':str}
df = pd.read_csv(os.path.join(elezioni_path,'CSV/scrutini_liguria.csv'), dtype=types, sep=';')
url = 'https://elezioni.interno.gov.it/mappe/comuni_07.geojson'
comuni_07 = gpd.read_file(url)
In [8]:
df[df['desc_com']=='GENOVA'].to_dict('records')
Out[8]:
[{'dt_ele': '2024-10-27',
  'desc_com': 'GENOVA',
  'ele_m': 225668,
  'ele_f': 253210,
  'ele_t': 478878,
  'vot_m': 114018,
  'vot_f': 125926,
  'vot_t': 239944,
  'perc_vot': 50.11,
  'sz_tot': 653,
  'fine_rip': 'S',
  'sk_bianche': 1217,
  'sk_nulle': 5610,
  'sk_contestate': 70,
  'tot_vot_lis': 217864,
  'non_valid': nan,
  'data_prec_elez': 20200920000000,
  'reg_sto': 7,
  'prov_sto': 34,
  'comu_sto': 250,
  'circ_sto': 34,
  'cod_prov': '034',
  'cod_com': '0250',
  'minint_elettorale': '1070340250',
  'ANDREA ORLANDO_voti': 121821,
  'ANDREA ORLANDO_perc': 52.27,
  'MARCO BUCCI_voti': 103219,
  'MARCO BUCCI_perc': 44.29,
  'FRANCESCO TOSCANO_voti': 1764,
  'FRANCESCO TOSCANO_perc': 0.76,
  'ALESSANDRO ROSSON_voti': 319,
  'ALESSANDRO ROSSON_perc': 0.14,
  'DAVIDE FELICE_voti': 464,
  'DAVIDE FELICE_perc': 0.2,
  'MARCO GIUSEPPE FERRANDO_voti': 961,
  'MARCO GIUSEPPE FERRANDO_perc': 0.41,
  'NICOLA MORRA_voti': 2457,
  'NICOLA MORRA_perc': 1.05,
  'NICOLA ROLLANDO_voti': 1534,
  'NICOLA ROLLANDO_perc': 0.66,
  'MARIA ANTONIETTA CELLA_voti': 508,
  'MARIA ANTONIETTA CELLA_perc': 0.22}]

Selezione delle colonne del dataset degli scrutini¶

In [9]:
df = df[['minint_elettorale','ANDREA ORLANDO_voti','ANDREA ORLANDO_perc', 'MARCO BUCCI_voti','MARCO BUCCI_perc']].copy()

Attribuzione delle informazioni geografiche¶

In [10]:
df_adv = comuni_07[['name','minint_elettorale','geometry','com_istat_code']].merge(df, left_on='minint_elettorale', right_on='minint_elettorale')

Suddivisione nei due dataset per maggioranza dei voti¶

In [11]:
ao = df_adv[df_adv['ANDREA ORLANDO_voti']>df_adv['MARCO BUCCI_voti']].copy()
mb = df_adv[df_adv['ANDREA ORLANDO_voti']<df_adv['MARCO BUCCI_voti']].copy()

Visualizzazione su base comunale¶

In [12]:
m = ao.explore(column='ANDREA ORLANDO_perc',cmap='Blues', name='Orlando')
m = mb.explore(m=m,column='MARCO BUCCI_perc', cmap='Reds', name='Bucci')
folium.LayerControl().add_to(m)
m
Out[12]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Utilizzo della tecnica "sample_points"¶

Creazione di un numero casuale di punti all'interno dei poligoni dei comuni, il numero di punti è proporzionale alla percentuale di voti ottenuta dal candidato

In [13]:
seed = 1234512345
p_voti = ao['ANDREA ORLANDO_perc'].mul(1).astype(int).values
ao['p_voti'] = ao.sample_points(p_voti,rng=seed)
ao_p = ao.set_geometry('p_voti')
p_voti = mb['MARCO BUCCI_perc'].mul(1).astype(int).values
mb['p_voti'] = mb.sample_points(p_voti,rng=seed)
mb_p = mb.set_geometry('p_voti')

Visualizzazione¶

In [14]:
m = ao_p.explode(index_parts=True).explore(column='ANDREA ORLANDO_perc',cmap='Blues', name='Orlando')
m = mb_p.explode(index_parts=True).explore(m=m,column='MARCO BUCCI_perc', cmap='Reds', name='Bucci')
folium.LayerControl().add_to(m)
m
Out[14]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Integrazione con i dati ISTAT (popolazione per classi di età) e Corine Land Cover¶

In [15]:
istat_path = os.path.join(os.path.expanduser('~'),'ILAB_DATA','ISTAT','DATA')
comuni_adv = gpd.read_parquet(os.path.join(istat_path,'comuni_adv.parquet'))
comuni_pop_age = pd.read_parquet(os.path.join(istat_path,'comuni_pop_age.parquet'))
comuni_adv = comuni_adv.merge(comuni_pop_age[comuni_pop_age['SEX']=='total'], left_on='pro_com_t', right_on='REF_AREA')
In [16]:
dati_liguria = comuni_adv.merge(df_adv[['com_istat_code','ANDREA ORLANDO_voti','ANDREA ORLANDO_perc','MARCO BUCCI_voti','MARCO BUCCI_perc']], 
                 left_on='pro_com_t', right_on='com_istat_code')
data = dati_liguria[['pro_com_t','area_km2','females',
       'males', 'total', 'AGRICULTURAL',
       'ARTIFICIAL_NON_AGRICULTURAL_VEGETATED', 'FOREST_AND_SEMI_NATURAL',
       'INDUSTRIAL_COMMERCIAL_AND_TRANSPORT', 'MINE_DUMP_AND_CONSTRUCTION',
       'URBAN', 'WATER_BODIES', 'WETLANDS', 'Y20-29', 'Y30-49', 'Y50-69', 'Y70-109',
       'ANDREA ORLANDO_perc','MARCO BUCCI_perc']].copy()
data.set_index('pro_com_t',inplace=True)
In [17]:
data
Out[17]:
area_km2 females males total AGRICULTURAL ARTIFICIAL_NON_AGRICULTURAL_VEGETATED FOREST_AND_SEMI_NATURAL INDUSTRIAL_COMMERCIAL_AND_TRANSPORT MINE_DUMP_AND_CONSTRUCTION URBAN WATER_BODIES WETLANDS Y20-29 Y30-49 Y50-69 Y70-109 ANDREA ORLANDO_perc MARCO BUCCI_perc
pro_com_t
008001 14.626779 170 200 370 0.000000 0.0 100.000000 0.000000 0.0 0.000000 0.000000 0.0 17 92 106 106 49.32 45.89
008002 19.936356 301 311 612 36.121438 0.0 63.878562 0.000000 0.0 0.000000 0.000000 0.0 48 150 194 129 47.37 45.86
008003 10.060088 65 73 138 15.525990 0.0 84.474010 0.000000 0.0 0.000000 0.000000 0.0 12 28 54 31 4.60 94.25
008004 10.092486 61 59 120 6.612798 0.0 93.387202 0.000000 0.0 0.000000 0.000000 0.0 11 23 39 31 20.00 76.00
008005 9.142917 157 170 327 31.141420 0.0 68.858580 0.000000 0.0 0.000000 0.000000 0.0 30 67 88 97 30.13 66.67
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
011028 67.775785 609 653 1262 8.512419 0.0 91.030770 0.000000 0.0 0.456811 0.000000 0.0 94 269 398 375 57.38 38.60
011029 137.588344 885 873 1758 19.445364 0.0 80.554636 0.000000 0.0 0.000000 0.000000 0.0 122 347 573 509 43.67 51.45
011030 12.302393 362 346 708 15.554249 0.0 83.883300 0.000000 0.0 0.000000 0.562451 0.0 53 115 249 230 63.85 32.98
011031 18.367933 3676 3486 7162 37.219224 0.0 48.855779 3.113716 0.0 10.811281 0.000000 0.0 622 1581 2389 1527 52.72 43.77
011032 28.700802 231 238 469 22.675573 0.0 77.324427 0.000000 0.0 0.000000 0.000000 0.0 23 104 148 121 32.75 63.76

234 rows × 18 columns

In [18]:
data['females'] = data['females'] / data['total'] *100
data['males'] = data['males'] / data['total'] *100
data['Y20-29'] = data['Y20-29'] / data['total'] *100
data['Y30-49'] = data['Y30-49'] / data['total'] *100
data['Y50-69'] = data['Y50-69'] / data['total'] *100
data['Y70-109'] = data['Y70-109'] / data['total'] *100
In [19]:
data.to_csv(os.path.join(elezioni_path,'CSV','data_liguria.csv'),sep=';')
In [20]:
corr_matrix = data.corr()
corr_v = 0.7
corr_matrix.style.apply(lambda x: ["background-color: red"
                          if (v > corr_v or v < -corr_v) and (v != 1)
                          else "" for v in x], axis = 1)
Out[20]:
  area_km2 females males total AGRICULTURAL ARTIFICIAL_NON_AGRICULTURAL_VEGETATED FOREST_AND_SEMI_NATURAL INDUSTRIAL_COMMERCIAL_AND_TRANSPORT MINE_DUMP_AND_CONSTRUCTION URBAN WATER_BODIES WETLANDS Y20-29 Y30-49 Y50-69 Y70-109 ANDREA ORLANDO_perc MARCO BUCCI_perc
area_km2 1.000000 -0.032290 0.032290 0.626015 -0.241463 -0.019330 0.237046 0.106459 0.093418 -0.121386 -0.148969 0.031280 -0.041038 -0.058275 0.022249 0.078398 0.143434 -0.201550
females -0.032290 1.000000 -1.000000 0.129608 0.250803 0.057499 -0.398878 0.149995 0.126169 0.468177 0.242101 0.046052 -0.090424 -0.090289 -0.167106 -0.065218 0.204537 -0.187057
males 0.032290 -1.000000 1.000000 -0.129608 -0.250803 -0.057499 0.398878 -0.149995 -0.126169 -0.468177 -0.242101 -0.046052 0.090424 0.090289 0.167106 0.065218 -0.204537 0.187057
total 0.626015 0.129608 -0.129608 1.000000 -0.007709 0.014484 -0.094005 0.284213 0.153594 0.199749 0.023596 0.026755 0.083790 0.062599 -0.095519 -0.060344 0.129856 -0.120852
AGRICULTURAL -0.241463 0.250803 -0.250803 -0.007709 1.000000 -0.007968 -0.913782 0.064477 0.051967 0.223816 0.132012 0.102653 0.189106 0.280130 -0.196407 -0.333190 -0.032664 0.049824
ARTIFICIAL_NON_AGRICULTURAL_VEGETATED -0.019330 0.057499 -0.057499 0.014484 -0.007968 1.000000 -0.029302 0.054147 -0.023721 0.029940 -0.003275 0.169265 0.036603 0.096524 -0.035088 -0.093645 0.012479 -0.006709
FOREST_AND_SEMI_NATURAL 0.237046 -0.398878 0.398878 -0.094005 -0.913782 -0.029302 1.000000 -0.245982 -0.139337 -0.587146 -0.356095 -0.150912 -0.205909 -0.241148 0.205397 0.320120 -0.074330 0.052368
INDUSTRIAL_COMMERCIAL_AND_TRANSPORT 0.106459 0.149995 -0.149995 0.284213 0.064477 0.054147 -0.245982 1.000000 0.196244 0.254323 0.123812 0.110610 0.142396 0.161984 -0.113607 -0.204243 0.221407 -0.213716
MINE_DUMP_AND_CONSTRUCTION 0.093418 0.126169 -0.126169 0.153594 0.051967 -0.023721 -0.139337 0.196244 1.000000 0.163254 0.206343 -0.013185 0.055335 -0.002820 0.002532 -0.059580 0.142298 -0.135865
URBAN -0.121386 0.468177 -0.468177 0.199749 0.223816 0.029940 -0.587146 0.254323 0.163254 1.000000 0.533635 0.119573 0.107905 -0.004329 -0.101009 -0.077300 0.212145 -0.193660
WATER_BODIES -0.148969 0.242101 -0.242101 0.023596 0.132012 -0.003275 -0.356095 0.123812 0.206343 0.533635 1.000000 0.130519 -0.052216 -0.109982 0.066269 0.066356 0.073356 -0.063806
WETLANDS 0.031280 0.046052 -0.046052 0.026755 0.102653 0.169265 -0.150912 0.110610 -0.013185 0.119573 0.130519 1.000000 0.011916 0.053785 -0.006159 -0.052124 0.088126 -0.087273
Y20-29 -0.041038 -0.090424 0.090424 0.083790 0.189106 0.036603 -0.205909 0.142396 0.055335 0.107905 -0.052216 0.011916 1.000000 0.270414 -0.192018 -0.519766 -0.012893 0.053024
Y30-49 -0.058275 -0.090289 0.090289 0.062599 0.280130 0.096524 -0.241148 0.161984 -0.002820 -0.004329 -0.109982 0.053785 0.270414 1.000000 -0.507460 -0.732939 -0.015915 0.017318
Y50-69 0.022249 -0.167106 0.167106 -0.095519 -0.196407 -0.035088 0.205397 -0.113607 0.002532 -0.101009 0.066269 -0.006159 -0.192018 -0.507460 1.000000 0.060071 0.034425 -0.052197
Y70-109 0.078398 -0.065218 0.065218 -0.060344 -0.333190 -0.093645 0.320120 -0.204243 -0.059580 -0.077300 0.066356 -0.052124 -0.519766 -0.732939 0.060071 1.000000 -0.077081 0.056062
ANDREA ORLANDO_perc 0.143434 0.204537 -0.204537 0.129856 -0.032664 0.012479 -0.074330 0.221407 0.142298 0.212145 0.073356 0.088126 -0.012893 -0.015915 0.034425 -0.077081 1.000000 -0.964133
MARCO BUCCI_perc -0.201550 -0.187057 0.187057 -0.120852 0.049824 -0.006709 0.052368 -0.213716 -0.135865 -0.193660 -0.063806 -0.087273 0.053024 0.017318 -0.052197 0.056062 -0.964133 1.000000
In [21]:
# Plotting the correlation matrix for visual representation
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()
No description has been provided for this image
In [22]:
del data['males']
del data['MARCO BUCCI_perc']

Analisi Cluster¶

In [23]:
data_kmeans = data.copy()
In [24]:
scaler = StandardScaler()
data_std = scaler.fit_transform(data_kmeans)
kmeans = KMeans(n_clusters=4,init='k-means++',random_state=42)
kmeans.fit(data_std)
data_kmeans.reset_index(inplace=True)
data_kmeans['Segment k-means'] = kmeans.labels_
data_kmeans['segment'] = data_kmeans['Segment k-means'].map(
    {
       0:'primo',
       1:'secondo',
       2:'terzo',
       3:'quarto'  
    }
)
In [25]:
data_out = comuni_adv[['pro_com_t','geometry']].merge(data_kmeans)
data_out.explore(column='segment',cmap=['green','red','pink','magenta'],
            legend=True,legend_kwds={'caption':'Cluster'})
Out[25]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Analisi Cluster - PCA¶

In [26]:
data = pd.read_csv(os.path.join(elezioni_path,'CSV','data_liguria.csv'),sep=';',dtype={'pro_com_t':str})
data.set_index('pro_com_t',inplace=True)
In [27]:
data.columns
Out[27]:
Index(['area_km2', 'females', 'males', 'total', 'AGRICULTURAL',
       'ARTIFICIAL_NON_AGRICULTURAL_VEGETATED', 'FOREST_AND_SEMI_NATURAL',
       'INDUSTRIAL_COMMERCIAL_AND_TRANSPORT', 'MINE_DUMP_AND_CONSTRUCTION',
       'URBAN', 'WATER_BODIES', 'WETLANDS', 'Y20-29', 'Y30-49', 'Y50-69',
       'Y70-109', 'ANDREA ORLANDO_perc', 'MARCO BUCCI_perc'],
      dtype='object')
In [28]:
to_drop =['MARCO BUCCI_perc',
                       'males',
                       'MINE_DUMP_AND_CONSTRUCTION',
                       'WATER_BODIES',
                       'WETLANDS']
In [29]:
to_drop = ['area_km2',  'males', 'AGRICULTURAL',
       'ARTIFICIAL_NON_AGRICULTURAL_VEGETATED', 'FOREST_AND_SEMI_NATURAL',
       'INDUSTRIAL_COMMERCIAL_AND_TRANSPORT', 'MINE_DUMP_AND_CONSTRUCTION',
        'WATER_BODIES', 'WETLANDS', 'MARCO BUCCI_perc']
In [30]:
X = data.drop(columns=to_drop)
scaler = StandardScaler()
X_std = scaler.fit_transform(X)
pca = PCA()
pca.fit(X_std)
Out[30]:
PCA()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA()
In [31]:
plt.figure(figsize=(10,8))
plt.plot(range(1,len(pca.explained_variance_ratio_)+1),pca.explained_variance_ratio_.cumsum(), marker='o',linestyle='--')
plt.xlabel('Number of components')
plt.ylabel('Cumulative explained variance')
plt.title('Explained variance by components')
Out[31]:
Text(0.5, 1.0, 'Explained variance by components')
No description has been provided for this image
In [32]:
componenti_pca = 5
pca = PCA(n_components=componenti_pca)
pca.fit(X_std)
scores_pca = pca.transform(X_std)
In [33]:
cos2 = pd.DataFrame(np.square(pca.components_))
cos2.columns = X.columns
cos2['component'] = cos2.reset_index().index +1
In [34]:
wcss = list()
for i in range(1,21):
    kmeans_pca = KMeans(n_clusters=i,init='k-means++',random_state=42)
    kmeans_pca.fit(scores_pca)
    wcss.append(kmeans_pca.inertia_)
In [35]:
plt.figure(figsize=(10,8))
plt.plot(range(1,21), wcss,marker='o',linestyle='--')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.title('k-means with PCA Clustering')
Out[35]:
Text(0.5, 1.0, 'k-means with PCA Clustering')
No description has been provided for this image
In [36]:
numero_cluster = 4
kmeans_pca = KMeans(n_clusters=numero_cluster,init='k-means++',random_state=42)
kmeans_pca.fit(scores_pca)
Out[36]:
KMeans(n_clusters=4, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=4, random_state=42)
In [37]:
X_pca_kmeans = pd.concat([data.reset_index(),pd.DataFrame(scores_pca)], axis='columns')
columns_rename = {k:f'Componente {k+1}' for k in range(componenti_pca)}
X_pca_kmeans.rename(columns=columns_rename, inplace=True)
X_pca_kmeans['Segment k-means PCA'] = kmeans_pca.labels_
X_pca_kmeans['segment'] = X_pca_kmeans['Segment k-means PCA'].map(
    {
       0:'primo',
       1:'secondo',
       2:'terzo',
       3:'quarto',
       4:'quinto'
    }
)
In [38]:
# Calcola i loadings
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
loadings_df = pd.DataFrame(loadings, columns=[f'PC{i+1}' for i in range(len(pca.components_))], index=X.columns)
In [39]:
# Plotting the correlation matrix for visual representation
plt.figure(figsize=(12, 10))
sns.heatmap(loadings_df, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()
No description has been provided for this image
In [40]:
# x_axis = X_pca_kmeans['Componente 2']
# y_axis = X_pca_kmeans['Componente 1']
plt.figure(figsize=(10,8))
sns.scatterplot(
    data = X_pca_kmeans,
    x='Componente 2',
    y='Componente 1',
    hue='segment')
plt.title('Clusters by PCA Components')
plt.show()
No description has been provided for this image
In [41]:
X_out = comuni_adv[['pro_com_t','comune','geometry']].merge(X_pca_kmeans)
X_out.explore(column='segment',legend=True,legend_kwds={'caption':'Cluster by PCA'})
Out[41]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: