import pandas as pd
import matplotlib.pyplot as plt
f = "../data/002_processed/esolmet_2018.parquet"
tmx = pd.read_parquet(f)
tmx.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 51173 entries, 2018-01-01 00:00:00 to 2018-12-31 23:50:00
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Ib      51173 non-null  float64
 1   Ig      51173 non-null  float64
 2   Id      51173 non-null  float64
 3   uv      51173 non-null  float64
 4   To      51173 non-null  float64
 5   hr      51173 non-null  float64
 6   ws      51173 non-null  float64
 7   p       51173 non-null  float64
dtypes: float64(8)
memory usage: 3.5 MB
tmx
Ib Ig Id uv To hr ws p
Fecha
2018-01-01 00:00:00 0.057 0.0 0.0 0.001 18.93 41.57 1.253 879.0692
2018-01-01 00:10:00 0.002 0.0 0.0 0.001 18.76 41.00 0.418 879.4363
2018-01-01 00:20:00 0.170 0.0 0.0 0.001 18.92 40.96 0.955 879.5181
2018-01-01 00:30:00 0.371 0.0 0.0 0.001 18.52 42.46 1.823 879.5826
2018-01-01 00:40:00 0.305 0.0 0.0 0.001 18.49 42.43 2.149 879.6826
... ... ... ... ... ... ... ... ...
2018-12-31 23:10:00 0.125 0.0 0.0 0.000 18.88 59.60 2.145 875.5595
2018-12-31 23:20:00 0.000 0.0 0.0 0.000 18.71 59.67 1.638 875.5595
2018-12-31 23:30:00 0.044 0.0 0.0 0.000 18.52 58.75 1.923 875.2889
2018-12-31 23:40:00 0.170 0.0 0.0 0.000 18.36 60.62 2.089 875.0606
2018-12-31 23:50:00 0.003 0.0 0.0 0.000 17.99 60.76 0.744 875.1424

51173 rows × 8 columns

def clasifica_temperatura(To):
    if To >= 27:
        return 3
    elif To>=25:
        return 2
    elif To>=22:
        return 1
    elif To>=20:
        return 0
    elif To>=18:
        return -1
    elif To>=15:
        return -2
    else:
        return -3
        

clasifica_temperatura(13.5)
-3
tmx["alto_bajo"] = tmx.To.apply(lambda Temp: "3" if Temp>=27 else "2")
tmx["alto_bajo"] = tmx.To.apply(clasifica_temperatura)
resultado = tmx.alto_bajo.value_counts()
resultado
alto_bajo
 3    11714
 1     9822
-1     8333
 0     6916
 2     6701
-2     5638
-3     2049
Name: count, dtype: int64
resultado.sort_index(inplace=True)
resultado
alto_bajo
-3     2049
-2     5638
-1     8333
 0     6916
 1     9822
 2     6701
 3    11714
Name: count, dtype: int64

fig, ax = plt.subplots(figsize=(6,3))

ax.barh(resultado.index,resultado)

ax.grid(alpha=0.2)
ax.set_xlabel("Ocurrencia")
ax.set_ylabel("Nivel calor ")
ax.set_title("Temixco, 2018")
Text(0.5, 1.0, 'Temixco, 2018')