30  %load tools/importaciones.py

import pandas as pd
import matplotlib.pyplot as plt
from tools.importaciones import importa_excel
import glob as glob
import pandas as pd

def importa_excel(f):
    df = pd.read_excel(f,skiprows=[0,1,3,4],index_col=0,parse_dates=True)
    return df
f = "../data/001_raw/001.xls"
df1 = importa_excel(f)

f = "../data/001_raw/002.xls"
df2 = importa_excel(f)


pd.concat([df1,df2],axis=0)
Directa Global Difusa UV Temperatura Humedad Viento Vel. Presiòn Bar.
Fecha
2018-01-01 00:00:00 0.057 0.0 0.0 0.001 18.93 41.57 1.253 879.0692
2018-01-01 00:10:00 0.002 0.0 0.0 0.001 18.76 41.00 0.418 879.4363
2018-01-01 00:20:00 0.170 0.0 0.0 0.001 18.92 40.96 0.955 879.5181
2018-01-01 00:30:00 0.371 0.0 0.0 0.001 18.52 42.46 1.823 879.5826
2018-01-01 00:40:00 0.305 0.0 0.0 0.001 18.49 42.43 2.149 879.6826
... ... ... ... ... ... ... ... ...
2018-02-28 23:20:00 0.000 0.0 0.0 0.000 22.49 38.44 1.811 874.8154
2018-02-28 23:30:00 0.000 0.0 0.0 0.000 22.75 39.60 0.859 874.8812
2018-02-28 23:40:00 0.003 0.0 0.0 0.000 22.34 39.43 1.244 875.0685
2018-02-28 23:50:00 0.117 0.0 0.0 0.000 22.52 39.90 1.729 874.9626
2018-03-01 00:00:00 0.004 0.0 0.0 0.000 22.35 41.03 1.508 875.0115

8498 rows × 8 columns

files = glob.glob("../data/001_raw/*.xls")

lista_dfs = [importa_excel(file) for file in files]
pd.concat(lista_dfs)
Directa Global Difusa UV Temperatura Humedad Viento Vel. Presiòn Bar.
Fecha
2018-01-01 00:00:00 0.057 0.0 0.0 0.001 18.93 41.57 1.253 879.0692
2018-01-01 00:10:00 0.002 0.0 0.0 0.001 18.76 41.00 0.418 879.4363
2018-01-01 00:20:00 0.170 0.0 0.0 0.001 18.92 40.96 0.955 879.5181
2018-01-01 00:30:00 0.371 0.0 0.0 0.001 18.52 42.46 1.823 879.5826
2018-01-01 00:40:00 0.305 0.0 0.0 0.001 18.49 42.43 2.149 879.6826
... ... ... ... ... ... ... ... ...
2018-12-31 23:20:00 0.000 0.0 0.0 0.000 18.71 59.67 1.638 875.5595
2018-12-31 23:30:00 0.044 0.0 0.0 0.000 18.52 58.75 1.923 875.2889
2018-12-31 23:40:00 0.170 0.0 0.0 0.000 18.36 60.62 2.089 875.0606
2018-12-31 23:50:00 0.003 0.0 0.0 0.000 17.99 60.76 0.744 875.1424
2019-01-01 00:00:00 0.028 0.0 0.0 0.000 17.85 59.02 0.723 875.2408

51174 rows × 8 columns

files
['../data/001_raw\\001.xls',
 '../data/001_raw\\002.xls',
 '../data/001_raw\\003.xls',
 '../data/001_raw\\004.xls',
 '../data/001_raw\\005.xls',
 '../data/001_raw\\006.xls',
 '../data/001_raw\\007.xls',
 '../data/001_raw\\008.xls',
 '../data/001_raw\\009.xls',
 '../data/001_raw\\010.xls',
 '../data/001_raw\\011.xls',
 '../data/001_raw\\012.xls']
files = glob.glob("../data/001_raw/*.xls")

esolmet = pd.concat([importa_excel(file) for file in files])
esolmet.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 51174 entries, 2018-01-01 00:00:00 to 2019-01-01 00:00:00
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Directa       51174 non-null  float64
 1   Global        51174 non-null  float64
 2   Difusa        51174 non-null  float64
 3   UV            51174 non-null  float64
 4   Temperatura   51174 non-null  float64
 5   Humedad       51174 non-null  float64
 6   Viento Vel.   51174 non-null  float64
 7   Presiòn Bar.  51174 non-null  float64
dtypes: float64(8)
memory usage: 3.5 MB
esolmet.sort_index(inplace=True)
esolmet.columns
Index(['Directa', 'Global', 'Difusa', 'UV', 'Temperatura', 'Humedad',
       'Viento Vel.', 'Presiòn Bar.'],
      dtype='object')
esolmet.columns = ['Ib', 'Ig', 'Id', "uv", 'To', 'hr','ws', 'p']
esolmet = esolmet[esolmet.index.year ==2018]
esolmet
Ib Ig Id uv To hr ws p
Fecha
2018-01-01 00:00:00 0.057 0.0 0.0 0.001 18.93 41.57 1.253 879.0692
2018-01-01 00:10:00 0.002 0.0 0.0 0.001 18.76 41.00 0.418 879.4363
2018-01-01 00:20:00 0.170 0.0 0.0 0.001 18.92 40.96 0.955 879.5181
2018-01-01 00:30:00 0.371 0.0 0.0 0.001 18.52 42.46 1.823 879.5826
2018-01-01 00:40:00 0.305 0.0 0.0 0.001 18.49 42.43 2.149 879.6826
... ... ... ... ... ... ... ... ...
2018-12-31 23:10:00 0.125 0.0 0.0 0.000 18.88 59.60 2.145 875.5595
2018-12-31 23:20:00 0.000 0.0 0.0 0.000 18.71 59.67 1.638 875.5595
2018-12-31 23:30:00 0.044 0.0 0.0 0.000 18.52 58.75 1.923 875.2889
2018-12-31 23:40:00 0.170 0.0 0.0 0.000 18.36 60.62 2.089 875.0606
2018-12-31 23:50:00 0.003 0.0 0.0 0.000 17.99 60.76 0.744 875.1424

51173 rows × 8 columns

esolmet.to_parquet("../data/002_processed/esolmet_2018.parquet")
esolmet.to_csv("../data/002_processed/esolmet_2018.csv")
esolmet.index
DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 00:10:00',
               '2018-01-01 00:20:00', '2018-01-01 00:30:00',
               '2018-01-01 00:40:00', '2018-01-01 00:50:00',
               '2018-01-01 01:00:00', '2018-01-01 01:10:00',
               '2018-01-01 01:20:00', '2018-01-01 01:30:00',
               ...
               '2018-12-31 22:20:00', '2018-12-31 22:30:00',
               '2018-12-31 22:40:00', '2018-12-31 22:50:00',
               '2018-12-31 23:00:00', '2018-12-31 23:10:00',
               '2018-12-31 23:20:00', '2018-12-31 23:30:00',
               '2018-12-31 23:40:00', '2018-12-31 23:50:00'],
              dtype='datetime64[ns]', name='Fecha', length=51173, freq=None)
esolmet.index.year ==2018
array([ True,  True,  True, ...,  True,  True,  True], shape=(51173,))