import pandas as pd
import matplotlib.pyplot as plt
from tools.importaciones import importa_excel
import glob as glob30 %load tools/importaciones.py
import pandas as pd
def importa_excel(f):
df = pd.read_excel(f,skiprows=[0,1,3,4],index_col=0,parse_dates=True)
return dff = "../data/001_raw/001.xls"
df1 = importa_excel(f)
f = "../data/001_raw/002.xls"
df2 = importa_excel(f)
pd.concat([df1,df2],axis=0)| Directa | Global | Difusa | UV | Temperatura | Humedad | Viento Vel. | Presiòn Bar. | |
|---|---|---|---|---|---|---|---|---|
| Fecha | ||||||||
| 2018-01-01 00:00:00 | 0.057 | 0.0 | 0.0 | 0.001 | 18.93 | 41.57 | 1.253 | 879.0692 |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 0.0 | 0.001 | 18.76 | 41.00 | 0.418 | 879.4363 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 0.0 | 0.001 | 18.92 | 40.96 | 0.955 | 879.5181 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 0.0 | 0.001 | 18.52 | 42.46 | 1.823 | 879.5826 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 0.0 | 0.001 | 18.49 | 42.43 | 2.149 | 879.6826 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-02-28 23:20:00 | 0.000 | 0.0 | 0.0 | 0.000 | 22.49 | 38.44 | 1.811 | 874.8154 |
| 2018-02-28 23:30:00 | 0.000 | 0.0 | 0.0 | 0.000 | 22.75 | 39.60 | 0.859 | 874.8812 |
| 2018-02-28 23:40:00 | 0.003 | 0.0 | 0.0 | 0.000 | 22.34 | 39.43 | 1.244 | 875.0685 |
| 2018-02-28 23:50:00 | 0.117 | 0.0 | 0.0 | 0.000 | 22.52 | 39.90 | 1.729 | 874.9626 |
| 2018-03-01 00:00:00 | 0.004 | 0.0 | 0.0 | 0.000 | 22.35 | 41.03 | 1.508 | 875.0115 |
8498 rows × 8 columns
files = glob.glob("../data/001_raw/*.xls")
lista_dfs = [importa_excel(file) for file in files]
pd.concat(lista_dfs)| Directa | Global | Difusa | UV | Temperatura | Humedad | Viento Vel. | Presiòn Bar. | |
|---|---|---|---|---|---|---|---|---|
| Fecha | ||||||||
| 2018-01-01 00:00:00 | 0.057 | 0.0 | 0.0 | 0.001 | 18.93 | 41.57 | 1.253 | 879.0692 |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 0.0 | 0.001 | 18.76 | 41.00 | 0.418 | 879.4363 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 0.0 | 0.001 | 18.92 | 40.96 | 0.955 | 879.5181 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 0.0 | 0.001 | 18.52 | 42.46 | 1.823 | 879.5826 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 0.0 | 0.001 | 18.49 | 42.43 | 2.149 | 879.6826 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-12-31 23:20:00 | 0.000 | 0.0 | 0.0 | 0.000 | 18.71 | 59.67 | 1.638 | 875.5595 |
| 2018-12-31 23:30:00 | 0.044 | 0.0 | 0.0 | 0.000 | 18.52 | 58.75 | 1.923 | 875.2889 |
| 2018-12-31 23:40:00 | 0.170 | 0.0 | 0.0 | 0.000 | 18.36 | 60.62 | 2.089 | 875.0606 |
| 2018-12-31 23:50:00 | 0.003 | 0.0 | 0.0 | 0.000 | 17.99 | 60.76 | 0.744 | 875.1424 |
| 2019-01-01 00:00:00 | 0.028 | 0.0 | 0.0 | 0.000 | 17.85 | 59.02 | 0.723 | 875.2408 |
51174 rows × 8 columns
files['../data/001_raw\\001.xls',
'../data/001_raw\\002.xls',
'../data/001_raw\\003.xls',
'../data/001_raw\\004.xls',
'../data/001_raw\\005.xls',
'../data/001_raw\\006.xls',
'../data/001_raw\\007.xls',
'../data/001_raw\\008.xls',
'../data/001_raw\\009.xls',
'../data/001_raw\\010.xls',
'../data/001_raw\\011.xls',
'../data/001_raw\\012.xls']
files = glob.glob("../data/001_raw/*.xls")
esolmet = pd.concat([importa_excel(file) for file in files])
esolmet.info()<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 51174 entries, 2018-01-01 00:00:00 to 2019-01-01 00:00:00
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Directa 51174 non-null float64
1 Global 51174 non-null float64
2 Difusa 51174 non-null float64
3 UV 51174 non-null float64
4 Temperatura 51174 non-null float64
5 Humedad 51174 non-null float64
6 Viento Vel. 51174 non-null float64
7 Presiòn Bar. 51174 non-null float64
dtypes: float64(8)
memory usage: 3.5 MB
esolmet.sort_index(inplace=True)esolmet.columnsIndex(['Directa', 'Global', 'Difusa', 'UV', 'Temperatura', 'Humedad',
'Viento Vel.', 'Presiòn Bar.'],
dtype='object')
esolmet.columns = ['Ib', 'Ig', 'Id', "uv", 'To', 'hr','ws', 'p']esolmet = esolmet[esolmet.index.year ==2018]esolmet| Ib | Ig | Id | uv | To | hr | ws | p | |
|---|---|---|---|---|---|---|---|---|
| Fecha | ||||||||
| 2018-01-01 00:00:00 | 0.057 | 0.0 | 0.0 | 0.001 | 18.93 | 41.57 | 1.253 | 879.0692 |
| 2018-01-01 00:10:00 | 0.002 | 0.0 | 0.0 | 0.001 | 18.76 | 41.00 | 0.418 | 879.4363 |
| 2018-01-01 00:20:00 | 0.170 | 0.0 | 0.0 | 0.001 | 18.92 | 40.96 | 0.955 | 879.5181 |
| 2018-01-01 00:30:00 | 0.371 | 0.0 | 0.0 | 0.001 | 18.52 | 42.46 | 1.823 | 879.5826 |
| 2018-01-01 00:40:00 | 0.305 | 0.0 | 0.0 | 0.001 | 18.49 | 42.43 | 2.149 | 879.6826 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-12-31 23:10:00 | 0.125 | 0.0 | 0.0 | 0.000 | 18.88 | 59.60 | 2.145 | 875.5595 |
| 2018-12-31 23:20:00 | 0.000 | 0.0 | 0.0 | 0.000 | 18.71 | 59.67 | 1.638 | 875.5595 |
| 2018-12-31 23:30:00 | 0.044 | 0.0 | 0.0 | 0.000 | 18.52 | 58.75 | 1.923 | 875.2889 |
| 2018-12-31 23:40:00 | 0.170 | 0.0 | 0.0 | 0.000 | 18.36 | 60.62 | 2.089 | 875.0606 |
| 2018-12-31 23:50:00 | 0.003 | 0.0 | 0.0 | 0.000 | 17.99 | 60.76 | 0.744 | 875.1424 |
51173 rows × 8 columns
esolmet.to_parquet("../data/002_processed/esolmet_2018.parquet")esolmet.to_csv("../data/002_processed/esolmet_2018.csv")esolmet.indexDatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 00:10:00',
'2018-01-01 00:20:00', '2018-01-01 00:30:00',
'2018-01-01 00:40:00', '2018-01-01 00:50:00',
'2018-01-01 01:00:00', '2018-01-01 01:10:00',
'2018-01-01 01:20:00', '2018-01-01 01:30:00',
...
'2018-12-31 22:20:00', '2018-12-31 22:30:00',
'2018-12-31 22:40:00', '2018-12-31 22:50:00',
'2018-12-31 23:00:00', '2018-12-31 23:10:00',
'2018-12-31 23:20:00', '2018-12-31 23:30:00',
'2018-12-31 23:40:00', '2018-12-31 23:50:00'],
dtype='datetime64[ns]', name='Fecha', length=51173, freq=None)
esolmet.index.year ==2018array([ True, True, True, ..., True, True, True], shape=(51173,))