Publications
2021 |
J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso Deep Learning for Time Series Forecasting: A Survey (Journal Article) Big Data, 9 (1), pp. 3-21, 2021. (Abstract | Links | BibTeX | Tags: big data, deep learning, time series) @article{TORRES21, title = {Deep Learning for Time Series Forecasting: A Survey}, author = {J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.liebertpub.com/doi/10.1089/big.2020.0159}, doi = {10.1089/big.2020.0159}, year = {2021}, date = {2021-02-05}, journal = {Big Data}, volume = {9}, number = {1}, pages = {3-21}, abstract = {Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed.}, keywords = {big data, deep learning, time series}, pubstate = {published}, tppubtype = {article} } Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed. |
2020 |
P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series (Conference) HAIS 15th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: big data, energy, IoT, time series) @conference{HAIS2020, title = {A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series}, author = {P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61705-9_43}, year = {2020}, date = {2020-11-04}, booktitle = {HAIS 15th International Conference on Hybrid Artificial Intelligence Systems}, pages = {522-533}, series = {Lecture Notes in Computer Science}, keywords = {big data, energy, IoT, time series}, pubstate = {published}, tppubtype = {conference} } |
Y. Lin and I. Koprinska and M. Rana and A. Troncoso Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning (Conference) ICANN 29th International Conference on Artificial Neural Networks, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: energy, time series) @conference{ICANN20, title = {Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning}, author = {Y. Lin and I. Koprinska and M. Rana and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61609-0_22}, year = {2020}, date = {2020-10-14}, booktitle = {ICANN 29th International Conference on Artificial Neural Networks}, pages = {271-283}, series = {Lecture Notes in Computer Science }, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting (Journal Article) Applied Sciences, 10 (16), pp. 5487, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{DIVINA2020, title = {Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting}, author = {F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.mdpi.com/2076-3417/10/16/5487}, doi = {https://doi.org/10.3390/app10165487}, year = {2020}, date = {2020-07-30}, journal = {Applied Sciences}, volume = {10}, number = {16}, pages = {5487}, abstract = {The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared. |
F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model (Journal Article) Big Data, 8 (4), pp. 308-322, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{MARTINEZ-ALVAREZ20, title = {Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model}, author = {F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme}, url = {https://www.liebertpub.com/doi/full/10.1089/big.2020.0051}, doi = {10.1089/big.2020.0051}, year = {2020}, date = {2020-07-22}, journal = {Big Data}, volume = {8}, number = {4}, pages = {308-322}, abstract = {This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance. |
R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand (Journal Article) Information Sciences, 540 , pp. 160-174, 2020. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{PEREZ20, title = {Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand}, author = {R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025520306010}, doi = {10.1016/j.ins.2020.06.014}, year = {2020}, date = {2020-06-06}, journal = {Information Sciences}, volume = {540}, pages = {160-174}, abstract = {This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand. |
M. Nazeriye and A. Haeri and F. Martínez-Álvarez Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach (Journal Article) Applied Sciences, 10 (10), pp. 3589, 2020. (Abstract | Links | BibTeX | Tags: energy, time series) @article{NAZERIYE20, title = {Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach}, author = {M. Nazeriye and A. Haeri and F. Martínez-Álvarez}, url = {https://www.mdpi.com/2076-3417/10/10/3589/}, doi = {https://doi.org/10.3390/app10103589}, year = {2020}, date = {2020-05-22}, journal = {Applied Sciences}, volume = {10}, number = {10}, pages = {3589}, abstract = {Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results |
A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration (Journal Article) Big Data Research, 19-20 , pp. 100135, 2020. (Abstract | Links | BibTeX | Tags: big data, time series) @article{FERNANDEZ20, title = {Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration}, author = {A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S2214579620300034}, doi = {10.1016/j.bdr.2020.100135}, year = {2020}, date = {2020-05-12}, journal = {Big Data Research}, volume = {19-20}, pages = {100135}, abstract = {The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation.}, keywords = {big data, time series}, pubstate = {published}, tppubtype = {article} } The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation. |
K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude (Journal Article) Soil Dynamics and Earthquake Engineering, 130 , pp. id105932, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{ASIM20d, title = {Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude}, author = {K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal}, url = {https://www.sciencedirect.com/science/article/pii/S0267726119302192}, doi = {https://doi.org/10.1016/j.soildyn.2019.105932}, year = {2020}, date = {2020-03-01}, journal = {Soil Dynamics and Earthquake Engineering}, volume = {130}, pages = {id105932}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area (Journal Article) Science of the Total Environment, 701 , pp. id134413, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{BUI20, title = {A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area}, author = {D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache}, url = {https://www.sciencedirect.com/science/article/pii/S0048969719344043}, doi = {https://doi.org/10.1016/j.scitotenv.2019.134413}, year = {2020}, date = {2020-01-20}, journal = {Science of the Total Environment}, volume = {701}, pages = {id134413}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez Time series clustering based on segment typologies extraction (Journal Article) IEEE Transactions on Cybernetics, 2020. (Abstract | Links | BibTeX | Tags: time series) @article{GUIJO20, title = {Time series clustering based on segment typologies extraction}, author = {D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez}, doi = {10.1109/TCYB.2019.2962584}, year = {2020}, date = {2020-01-15}, journal = {IEEE Transactions on Cybernetics}, abstract = {Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets. |
Ó. Trull and J.C. García-Díaz and A. Troncoso Initialization methods for multiple seasonal Holt–Winters forecasting models (Journal Article) Mathematics, 8 (2), pp. 268, 2020. (Links | BibTeX | Tags: energy, time series) @article{TRULL20a, title = {Initialization methods for multiple seasonal Holt–Winters forecasting models}, author = {Ó. Trull and J.C. García-Díaz and A. Troncoso}, doi = {10.3390/math8020268 }, year = {2020}, date = {2020-01-01}, journal = {Mathematics}, volume = {8}, number = {2}, pages = {268}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
Óscar Trull and J. Carlos García-Díaz and A. Troncoso Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain (Journal Article) Applied Sciences, 10 (7), pp. 2630, 2020. (Links | BibTeX | Tags: energy, time series) @article{Trull20b, title = {Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain}, author = {Óscar Trull and J. Carlos García-Díaz and A. Troncoso}, doi = {10.3390/app10072630}, year = {2020}, date = {2020-01-01}, journal = {Applied Sciences}, volume = {10}, number = {7}, pages = {2630}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
2019 |
C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system (Journal Article) IEEE Access, 7 , pp. 120840-120856, 2019. (Links | BibTeX | Tags: energy, time series) @article{GOMEZ19, title = {A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system}, author = {C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme}, url = {https://ieeexplore.ieee.org/document/8807120}, doi = {https://doi.org/10.1109/ACCESS.2019.2936478}, year = {2019}, date = {2019-08-01}, journal = {IEEE Access}, volume = {7}, pages = {120840-120856}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
F. Martínez-Álvarez and A. Morales-Esteban Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial) (Journal Article) Computers and Geosciences, 129 , pp. 38-39, 2019. (Links | BibTeX | Tags: big data, natural disasters, time series) @article{MARTINEZ19, title = {Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial)}, author = {F. Martínez-Álvarez and A. Morales-Esteban}, url = {https://www.sciencedirect.com/science/article/pii/S009830041930411X?dgcid=rss_sd_all}, doi = {https://doi.org/10.1016/j.cageo.2019.04.012}, year = {2019}, date = {2019-08-01}, journal = {Computers and Geosciences}, volume = {129}, pages = {38-39}, keywords = {big data, natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting (Conference) IWANN 15th International Work-Conference on Artificial Neural Networks, 11506 , Lecture Notes in Computer Science 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{TORRES19-2, title = {Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting}, author = {J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20521-8_22}, doi = {https://doi.org/10.1007/978-3-030-20521-8_22}, year = {2019}, date = {2019-05-16}, booktitle = {IWANN 15th International Work-Conference on Artificial Neural Networks}, volume = {11506}, pages = {259-269}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez Big data solar power forecasting based on deep learning and multiple data sources (Journal Article) Expert Systems, 36 , pp. id12394, 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @article{TORRES19-1, title = {Big data solar power forecasting based on deep learning and multiple data sources}, author = {J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/exsy.12394}, doi = {https://doi.org/10.1111/exsy.12394}, year = {2019}, date = {2019-03-01}, journal = {Expert Systems}, volume = {36}, pages = {id12394}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } |
R. Talavera-Llames and R. Pérez-Chacón and A. Troncoso and F. Martínez-Álvarez MV-kWNN: A novel multivariate and multi-output weighted nearest neighbors algorithm for big data time series forecasting (Journal Article) Neurocomputing, 353 , pp. 56-73, 2019. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{NEUCOM2019, title = {MV-kWNN: A novel multivariate and multi-output weighted nearest neighbors algorithm for big data time series forecasting}, author = {R. Talavera-Llames and R. Pérez-Chacón and A. Troncoso and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0925231219303236?via%3Dihub}, doi = {10.1016/j.neucom.2018.07.092}, year = {2019}, date = {2019-01-01}, journal = {Neurocomputing}, volume = {353}, pages = {56-73}, abstract = {This paper introduces a novel algorithm for big data time series forecasting. Its main novelty lies in its ability to deal with multivariate data, i.e. to consider multiple time series simultaneously, in order to make multi-output predictions. Real-world processes are typically characterised by several interrelated variables, and the future occurrence of certain time series cannot be explained without understanding the influence that other time series might have on the target time series. One key issue in the context of the multivariate analysis is to determine a priori whether exogenous variables must be included in the model or not. To deal with this, a correlation analysis is used to find a minimum correlation threshold that an exogenous time series must exhibit, in order to be beneficial. Furthermore, the proposed approach has been specifically designed to be used in the context of big data, thus making it possible to efficiently process very large time series. To evaluate the performance of the proposed approach we use data from Spanish electricity prices. Results have been compared to other multivariate approaches showing remarkable improvements both in terms of accuracy and execution time.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } This paper introduces a novel algorithm for big data time series forecasting. Its main novelty lies in its ability to deal with multivariate data, i.e. to consider multiple time series simultaneously, in order to make multi-output predictions. Real-world processes are typically characterised by several interrelated variables, and the future occurrence of certain time series cannot be explained without understanding the influence that other time series might have on the target time series. One key issue in the context of the multivariate analysis is to determine a priori whether exogenous variables must be included in the model or not. To deal with this, a correlation analysis is used to find a minimum correlation threshold that an exogenous time series must exhibit, in order to be beneficial. Furthermore, the proposed approach has been specifically designed to be used in the context of big data, thus making it possible to efficiently process very large time series. To evaluate the performance of the proposed approach we use data from Spanish electricity prices. Results have been compared to other multivariate approaches showing remarkable improvements both in terms of accuracy and execution time. |
F. Martinez-Alvarez and A. Schmutz and G. Asencio-Cortes and J. Jacques A Novel Hybrid Algorithm to Forecast Functional Time Series Based on Pattern Sequence Similarity with Application to Electricity Demand (Journal Article) Energies, 12 (94), pp. 1-18, 2019, ISSN: 1996-1073. (Abstract | Links | BibTeX | Tags: energy, time series) @article{en12010094b, title = {A Novel Hybrid Algorithm to Forecast Functional Time Series Based on Pattern Sequence Similarity with Application to Electricity Demand}, author = {F. Martinez-Alvarez and A. Schmutz and G. Asencio-Cortes and J. Jacques}, url = {http://www.mdpi.com/1996-1073/12/1/94}, doi = {10.3390/en12010094}, issn = {1996-1073}, year = {2019}, date = {2019-01-01}, journal = {Energies}, volume = {12}, number = {94}, pages = {1-18}, abstract = {The forecasting of future values is a very challenging task. In almost all scientific disciplines, the analysis of time series provides useful information and even economic benefits. In this context, this paper proposes a novel hybrid algorithm to forecast functional time series with arbitrary prediction horizons. It integrates a well-known clustering functional data algorithm into a forecasting strategy based on pattern sequence similarity, which was originally developed for discrete time series. The new approach assumes that some patterns are repeated over time, and it attempts to discover them and evaluate their immediate future. Hence, the algorithm first applies a clustering functional time series algorithm, i.e., it assigns labels to every data unit (it may represent either one hour, or one day, or any arbitrary length). As a result, the time series is transformed into a sequence of labels. Later, it retrieves the sequence of labels occurring just after the sample that we want to be forecasted. This sequence is searched for within the historical data, and every time it is found, the sample immediately after is stored. Once the searching process is terminated, the output is generated by weighting all stored data. The performance of the approach has been tested on real-world datasets related to electricity demand and compared to other existing methods, reporting very promising results. Finally, a statistical significance test has been carried out to confirm the suitability of the election of the compared methods. In conclusion, a novel algorithm to forecast functional time series is proposed with very satisfactory results when assessed in the context of electricity demand.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } The forecasting of future values is a very challenging task. In almost all scientific disciplines, the analysis of time series provides useful information and even economic benefits. In this context, this paper proposes a novel hybrid algorithm to forecast functional time series with arbitrary prediction horizons. It integrates a well-known clustering functional data algorithm into a forecasting strategy based on pattern sequence similarity, which was originally developed for discrete time series. The new approach assumes that some patterns are repeated over time, and it attempts to discover them and evaluate their immediate future. Hence, the algorithm first applies a clustering functional time series algorithm, i.e., it assigns labels to every data unit (it may represent either one hour, or one day, or any arbitrary length). As a result, the time series is transformed into a sequence of labels. Later, it retrieves the sequence of labels occurring just after the sample that we want to be forecasted. This sequence is searched for within the historical data, and every time it is found, the sample immediately after is stored. Once the searching process is terminated, the output is generated by weighting all stored data. The performance of the approach has been tested on real-world datasets related to electricity demand and compared to other existing methods, reporting very promising results. Finally, a statistical significance test has been carried out to confirm the suitability of the election of the compared methods. In conclusion, a novel algorithm to forecast functional time series is proposed with very satisfactory results when assessed in the context of electricity demand. |
Y. Lin and I. Koprinska and M. Rana and A. Troncoso Pattern Sequence Neural Network for Solar Power Forecasting (Conference) ICONIP 26th International Conference on Neural Information Processing, 2019. (BibTeX | Tags: energy, time series) @conference{ICONIP19, title = {Pattern Sequence Neural Network for Solar Power Forecasting}, author = {Y. Lin and I. Koprinska and M. Rana and A. Troncoso}, year = {2019}, date = {2019-01-01}, booktitle = {ICONIP 26th International Conference on Neural Information Processing}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
Ó. Trull and J. C. García-Díaz and A. Troncoso Application of Discrete-Interval Moving Seasonalities to Spanish Electricity Demand Forecasting during Easter (Journal Article) Energies, 12 (6), pp. 1083, 2019. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energies2019, title = {Application of Discrete-Interval Moving Seasonalities to Spanish Electricity Demand Forecasting during Easter}, author = {Ó. Trull and J. C. García-Díaz and A. Troncoso }, url = {https://www.mdpi.com/1996-1073/12/6/1083}, doi = {10.3390/en12061083}, year = {2019}, date = {2019-01-01}, journal = {Energies}, volume = {12}, number = {6}, pages = {1083}, abstract = {Forecasting electricity demand through time series is a tool used by transmission system operators to establish future operating conditions. The accuracy of these forecasts is essential for the precise development of activity. However, the accuracy of the forecasts is enormously subject to the calendar effect. The multiple seasonal Holt–Winters models are widely used due to the great precision and simplicity that they offer. Usually, these models relate this calendar effect to external variables that contribute to modification of their forecasts a posteriori. In this work, a new point of view is presented, where the calendar effect constitutes a built-in part of the Holt–Winters model. In particular, the proposed model incorporates discrete-interval moving seasonalities. Moreover, a clear example of the application of this methodology to situations that are difficult to treat, such as the days of Easter, is presented. The results show that the proposed model performs well, outperforming the regular Holt–Winters model and other methods such as artificial neural networks and Exponential Smoothing State Space Model with Box-Cox Transformation, ARMA Errors, Trend and Seasonal Components (TBATS) methods.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Forecasting electricity demand through time series is a tool used by transmission system operators to establish future operating conditions. The accuracy of these forecasts is essential for the precise development of activity. However, the accuracy of the forecasts is enormously subject to the calendar effect. The multiple seasonal Holt–Winters models are widely used due to the great precision and simplicity that they offer. Usually, these models relate this calendar effect to external variables that contribute to modification of their forecasts a posteriori. In this work, a new point of view is presented, where the calendar effect constitutes a built-in part of the Holt–Winters model. In particular, the proposed model incorporates discrete-interval moving seasonalities. Moreover, a clear example of the application of this methodology to situations that are difficult to treat, such as the days of Easter, is presented. The results show that the proposed model performs well, outperforming the regular Holt–Winters model and other methods such as artificial neural networks and Exponential Smoothing State Space Model with Box-Cox Transformation, ARMA Errors, Trend and Seasonal Components (TBATS) methods. |
M. S. Tehrany and S. Jones and F. Shabani and F. Martínez-Álvarez and D. T. Bui Theoretical and Applied Climatology, 137 , pp. 637-653, 2019. (Links | BibTeX | Tags: natural disasters, time series) @article{TEHRANY19, title = {A Novel Ensemble Modelling Approach for the Spatial Prediction of Tropical Forest Fire Susceptibility Using Logitboost Machine Learning Classifier and Multi-source Geospatial Data}, author = {M. S. Tehrany and S. Jones and F. Shabani and F. Martínez-Álvarez and D. T. Bui}, url = {https://link.springer.com/article/10.1007/s00704-018-2628-9}, doi = {https://doi.org/10.1007/s00704-018-2628-9}, year = {2019}, date = {2019-01-01}, journal = {Theoretical and Applied Climatology}, volume = {137}, pages = {637-653}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
A. Galicia and R. Talavera-Llames and A. Troncoso and I. Koprinska and F. Martínez-Álvarez Multi-step forecasting for big data time series based on ensemble learning (Journal Article) Knowledge Based-Systems, 163 , pp. 830-841, 2019. (Links | BibTeX | Tags: big data, time series) @article{GALICIA19, title = {Multi-step forecasting for big data time series based on ensemble learning}, author = {A. Galicia and R. Talavera-Llames and A. Troncoso and I. Koprinska and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705118304957}, doi = {https://doi.org/10.1016/j.knosys.2018.10.009}, year = {2019}, date = {2019-01-01}, journal = {Knowledge Based-Systems}, volume = {163}, pages = {830-841}, keywords = {big data, time series}, pubstate = {published}, tppubtype = {article} } |
F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings (Journal Article) Applied Sciences, 12 (10), pp. 1934, 2019. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energies2019b, title = {A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings}, author = {F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera}, url = {https://www.mdpi.com/1996-1073/12/10/1934}, doi = {https://doi.org/10.3390/en12101934}, year = {2019}, date = {2019-01-01}, journal = {Applied Sciences}, volume = {12}, number = {10}, pages = {1934}, abstract = {Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task. |
2018 |
D. Gutiérrez-Avilés and R. Giráldez and F. J. Gil-Cumbreras and C. Rubio-Escudero TRIQ: a new method to evaluate triclusters (Journal Article) BioData Mining, 11 (1), pp. 15, 2018. (Abstract | Links | BibTeX | Tags: bioinformatics, time series) @article{Gutierrez-Aviles2018, title = {TRIQ: a new method to evaluate triclusters}, author = {D. Gutiérrez-Avilés and R. Giráldez and F. J. Gil-Cumbreras and C. Rubio-Escudero}, url = {https://biodatamining.biomedcentral.com/articles/10.1186/s13040-018-0177-5}, doi = {10.1186/s13040-018-0177-5}, year = {2018}, date = {2018-01-01}, journal = {BioData Mining}, volume = {11}, number = {1}, pages = {15}, abstract = {Triclustering has shown to be a valuable tool for the analysis of microarray data since its appearance as an improvement of classical clustering and biclustering techniques. The standard for validation of triclustering is based on three different measures: correlation, graphic similarity of the patterns and functional annotations for the genes extracted from the Gene Ontology project (GO).}, keywords = {bioinformatics, time series}, pubstate = {published}, tppubtype = {article} } Triclustering has shown to be a valuable tool for the analysis of microarray data since its appearance as an improvement of classical clustering and biclustering techniques. The standard for validation of triclustering is based on three different measures: correlation, graphic similarity of the patterns and functional annotations for the genes extracted from the Gene Ontology project (GO). |
A. Troncoso and P. Ribera and G. Asencio-Cortés and I. Vega and D. Gallego Imbalanced classification techniques for monsoon forecasting based on a new climatic time series (Journal Article) Environmental Modelling & Software, 106 (6), pp. 48-56, 2018. (Abstract | Links | BibTeX | Tags: time series) @article{ENV2018, title = {Imbalanced classification techniques for monsoon forecasting based on a new climatic time series}, author = {A. Troncoso and P. Ribera and G. Asencio-Cortés and I. Vega and D. Gallego}, url = {https://www.sciencedirect.com/science/article/pii/S1364815217301950}, doi = {10.1016/j.envsoft.2017.11.024}, year = {2018}, date = {2018-01-01}, journal = {Environmental Modelling & Software}, volume = {106}, number = {6}, pages = {48-56}, abstract = {Monsoons have been widely studied in the literature due to their climatic impact related to precipitation and temperature over different regions around the world. In this work, data mining techniques, namely imbalanced classification techniques, are proposed in order to check the capability of climate indices to capture and forecast the evolution of theWestern North Pacific Summer Monsoon. Thus, the main goal is to predict if the monsoon will be an extreme monsoon for a temporal horizon of a month. Firstly, a new monthly index of the monsoon related to its intensity has been generated. Later, the problem of forecasting has been transformed into a binary imbalanced classification problem and a set of representative techniques, such as models based on trees, models based on rules, black box models and ensemble techniques, are applied to obtain the forecasts. From the results obtained, it can be concluded that the methodology proposed here reports promising results according to the quality measures evaluated and predicts extreme monsoons for a temporal horizon of a month with a high accuracy.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } Monsoons have been widely studied in the literature due to their climatic impact related to precipitation and temperature over different regions around the world. In this work, data mining techniques, namely imbalanced classification techniques, are proposed in order to check the capability of climate indices to capture and forecast the evolution of theWestern North Pacific Summer Monsoon. Thus, the main goal is to predict if the monsoon will be an extreme monsoon for a temporal horizon of a month. Firstly, a new monthly index of the monsoon related to its intensity has been generated. Later, the problem of forecasting has been transformed into a binary imbalanced classification problem and a set of representative techniques, such as models based on trees, models based on rules, black box models and ensemble techniques, are applied to obtain the forecasts. From the results obtained, it can be concluded that the methodology proposed here reports promising results according to the quality measures evaluated and predicts extreme monsoons for a temporal horizon of a month with a high accuracy. |
R. Pérez-Chacón and J. M. Luna and A. Troncoso and F. Martínez-Álvarez and J. C. Riquelme Big data analytics for discovering electricity consumption patterns in smart cities (Journal Article) Energies, 11 (3), pp. 683, 2018. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{Energies2018, title = {Big data analytics for discovering electricity consumption patterns in smart cities}, author = {R. Pérez-Chacón and J. M. Luna and A. Troncoso and F. Martínez-Álvarez and J. C. Riquelme}, url = {http://www.mdpi.com/1996-1073/11/3/683 }, doi = {10.3390/en11030683 }, year = {2018}, date = {2018-01-01}, journal = {Energies}, volume = {11}, number = {3}, pages = {683}, abstract = {New technologies such as sensor networks have been incorporated into the management of buildings for organizations and cities. Sensor networks have led to an exponential increase in the volume of data available in recent years, which can be used to extract consumption patterns for the purposes of energy and monetary savings. For this reason, new approaches and strategies are needed to analyze information in big data environments. This paper proposes a methodology to extract electric energy consumption patterns in big data time series, so that very valuable conclusions can be made for managers and governments. The methodology is based on the study of four clustering validity indices in their parallelized versions along with the application of a clustering technique. In particular, this work uses a voting system to choose an optimal number of clusters from the results of the indices, as well as the application of the distributed version of the k-means algorithm included in Apache Spark’s Machine Learning Library. The results, using electricity consumption for the years 2011–2017 for eight buildings of a public university, are presented and discussed. In addition, the performance of the proposed methodology is evaluated using synthetic big data, which cab represent thousands of buildings in a smart city. Finally, policies derived from the patterns discovered are proposed to optimize energy usage across the university campus.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } New technologies such as sensor networks have been incorporated into the management of buildings for organizations and cities. Sensor networks have led to an exponential increase in the volume of data available in recent years, which can be used to extract consumption patterns for the purposes of energy and monetary savings. For this reason, new approaches and strategies are needed to analyze information in big data environments. This paper proposes a methodology to extract electric energy consumption patterns in big data time series, so that very valuable conclusions can be made for managers and governments. The methodology is based on the study of four clustering validity indices in their parallelized versions along with the application of a clustering technique. In particular, this work uses a voting system to choose an optimal number of clusters from the results of the indices, as well as the application of the distributed version of the k-means algorithm included in Apache Spark’s Machine Learning Library. The results, using electricity consumption for the years 2011–2017 for eight buildings of a public university, are presented and discussed. In addition, the performance of the proposed methodology is evaluated using synthetic big data, which cab represent thousands of buildings in a smart city. Finally, policies derived from the patterns discovered are proposed to optimize energy usage across the university campus. |
A. Galicia and J. F. Torres and F. Martínez-Álvarez and A. Troncoso A novel Spark-based multi-step forecasting algorithm for big data time series (Journal Article) Information Sciences, 467 , pp. 800-818, 2018. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{INFSCI2018, title = {A novel Spark-based multi-step forecasting algorithm for big data time series}, author = {A. Galicia and J. F. Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025518304493}, doi = {10.1016/j.ins.2018.06.010}, year = {2018}, date = {2018-01-01}, journal = {Information Sciences}, volume = {467}, pages = {800-818}, abstract = {This paper presents different scalable methods for predicting big time series, namely time series with a high frequency measurement. Methods are also developed to deal with arbitrary prediction horizons. The Apache Spark framework is proposed for distributed computing in order to achieve the scalability of the methods. Prediction methods have been developed using Spark’s MLlib library for machine learning. Since the library does not support multivariate regression, the prediction problem is formulated as h prediction sub-problems, where h is the number of future values to predict, that is, the prediction horizon. Furthermore, different kinds of representative methods have been chosen, such as decision trees, two tree-based ensemble techniques (Gradient-Boosted and Random Forest) and a linear regression method as a reference method for comparisons. Finally, the methodology has been tested in a real time series of electrical demand in Spain, with a time interval of ten minutes between measurements.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } This paper presents different scalable methods for predicting big time series, namely time series with a high frequency measurement. Methods are also developed to deal with arbitrary prediction horizons. The Apache Spark framework is proposed for distributed computing in order to achieve the scalability of the methods. Prediction methods have been developed using Spark’s MLlib library for machine learning. Since the library does not support multivariate regression, the prediction problem is formulated as h prediction sub-problems, where h is the number of future values to predict, that is, the prediction horizon. Furthermore, different kinds of representative methods have been chosen, such as decision trees, two tree-based ensemble techniques (Gradient-Boosted and Random Forest) and a linear regression method as a reference method for comparisons. Finally, the methodology has been tested in a real time series of electrical demand in Spain, with a time interval of ten minutes between measurements. |
R. Talavera-Llames and R. Pérez-Chacón and A. Troncoso and F. Martínez-Álvarez Big data time series forecasting based on nearest neighbors distributed computing with Spark (Journal Article) Knowledge-Based Systems, 161 (1), pp. 12-25, 2018. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{KNOSYS2018b, title = {Big data time series forecasting based on nearest neighbors distributed computing with Spark}, author = {R. Talavera-Llames and R. Pérez-Chacón and A. Troncoso and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0950705118303770}, doi = {10.1016/j.knosys.2018.07.026}, year = {2018}, date = {2018-01-01}, journal = {Knowledge-Based Systems}, volume = {161}, number = {1}, pages = {12-25}, abstract = {A new approach for big data forecasting based on the k-weighted nearest neighbours algorithm is introduced in this work. Such an algorithm has been developed for distributed computing under the Apache Spark framework. Every phase of the algorithm is explained in this work, along with how the optimal values of the input parameters required for the algorithm are obtained. In order to test the developed algorithm, a Spanish energy consumption big data time series has been used. The accuracy of the prediction has been assessed showing remarkable results. Additionally, the optimal configuration of a Spark cluster has been discussed. Finally, a scalability analysis of the algorithm has been conducted leading to the conclusion that the proposed algorithm is highly suitable for big data environments.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } A new approach for big data forecasting based on the k-weighted nearest neighbours algorithm is introduced in this work. Such an algorithm has been developed for distributed computing under the Apache Spark framework. Every phase of the algorithm is explained in this work, along with how the optimal values of the input parameters required for the algorithm are obtained. In order to test the developed algorithm, a Spanish energy consumption big data time series has been used. The accuracy of the prediction has been assessed showing remarkable results. Additionally, the optimal configuration of a Spark cluster has been discussed. Finally, a scalability analysis of the algorithm has been conducted leading to the conclusion that the proposed algorithm is highly suitable for big data environments. |
J. F. Torres and A. Galicia and A. Troncoso and F. Martínez-Álvarez A scalable approach based on deep learning for big data time series forecasting (Journal Article) Integrated Computer-Aided Engineering, 25 (4), pp. 335-348, 2018. (Abstract | Links | BibTeX | Tags: deep learning, energy, time series) @article{ICAE2018, title = {A scalable approach based on deep learning for big data time series forecasting}, author = {J. F. Torres and A. Galicia and A. Troncoso and F. Martínez-Álvarez}, url = {https://content.iospress.com/articles/integrated-computer-aided-engineering/ica580}, doi = {10.3233/ICA-180580}, year = {2018}, date = {2018-01-01}, journal = {Integrated Computer-Aided Engineering}, volume = {25}, number = {4}, pages = {335-348}, abstract = {This paper presents a method based on deep learning to deal with big data times series forecasting. The deep feed forward neural network provided by the H2O big data analysis framework has been used along with the Apache Spark platform for distributed computing. Since H2O does not allow the conduction of multi-step regression, a general-purpose methodology that can be used for prediction horizons with arbitrary length is proposed here, being the prediction horizon, h, the number of future values to be predicted. The solution consists in splitting the problem into h forecasting subproblems, being h the number of samples to be simultaneously predicted. Thus, the best prediction model for each subproblem can be obtained, making easier its parallelization and adaptation to the big data context. Moreover, a grid search is carried out to obtain the optimal hyperparameters of the deep learning-based approach. Results from a real-world dataset composed of electricity consumption in Spain, with a ten-minute frequency sampling rate, from 2007 to 2016 are reported. In particular, the accuracy and runtimes versus computing resources and size of the dataset are analyzed. Finally, the performance and the scalability of the proposed method is compared to other recently published techniques, showing to be a suitable method to process big data time series.}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } This paper presents a method based on deep learning to deal with big data times series forecasting. The deep feed forward neural network provided by the H2O big data analysis framework has been used along with the Apache Spark platform for distributed computing. Since H2O does not allow the conduction of multi-step regression, a general-purpose methodology that can be used for prediction horizons with arbitrary length is proposed here, being the prediction horizon, h, the number of future values to be predicted. The solution consists in splitting the problem into h forecasting subproblems, being h the number of samples to be simultaneously predicted. Thus, the best prediction model for each subproblem can be obtained, making easier its parallelization and adaptation to the big data context. Moreover, a grid search is carried out to obtain the optimal hyperparameters of the deep learning-based approach. Results from a real-world dataset composed of electricity consumption in Spain, with a ten-minute frequency sampling rate, from 2007 to 2016 are reported. In particular, the accuracy and runtimes versus computing resources and size of the dataset are analyzed. Finally, the performance and the scalability of the proposed method is compared to other recently published techniques, showing to be a suitable method to process big data time series. |
J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez Deep learning for big data time series forecasting applied to solar power (Conference) SOCO 13th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2018. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{SOCO2018, title = {Deep learning for big data time series forecasting applied to solar power}, author = {J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-319-94120-2_12}, year = {2018}, date = {2018-01-01}, booktitle = {SOCO 13th International Conference on Soft Computing Models in Industrial and Environmental Applications}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
Z. Wang and I. Koprinska and A. Troncoso and F. Martínez-Álvarez Static and dinamic ensembles of neural networks for power solar forecasting (Conference) IJCNN International Joint Conference on Neural Networks, 2018. (BibTeX | Tags: energy, time series) @conference{IJCNN2018, title = {Static and dinamic ensembles of neural networks for power solar forecasting}, author = {Z. Wang and I. Koprinska and A. Troncoso and F. Martínez-Álvarez}, year = {2018}, date = {2018-01-01}, booktitle = {IJCNN International Joint Conference on Neural Networks}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
E. Florido and G. Asencio-Cortes and J. L. Aznarte and C. Rubio-Escudero and F. Martinez-Alvarez A novel tree-based algorithm to discover seismic patterns in earthquake catalogs (Journal Article) Computers and Geosciences, (115), pp. 96-104, 2018, ISSN: 0098-3004. (Abstract | Links | BibTeX | Tags: natural disasters, time series) @article{Florido2018, title = {A novel tree-based algorithm to discover seismic patterns in earthquake catalogs}, author = {E. Florido and G. Asencio-Cortes and J. L. Aznarte and C. Rubio-Escudero and F. Martinez-Alvarez}, doi = {10.1016/j.cageo.2018.03.005}, issn = {0098-3004}, year = {2018}, date = {2018-01-01}, journal = {Computers and Geosciences}, number = {115}, pages = {96-104}, abstract = {A novel methodology is introduced in this research study to detect seismic precursors. Based on an existing approach, the new methodology searches for patterns in the historical data. Such patterns may contain statistical or soil dynamics information. It improves the original version in several aspects. First, new seismicity indicators have been used to characterize earthquakes. Second, a machine learning clustering algorithm has been applied in a very flexible way, thus allowing the discovery of new data groupings. Third, a novel search strategy is proposed in order to obtain non-overlapped patterns. And, fourth, arbitrary lengths of patterns are searched for, thus discovering long and short-term behaviors that may influence in the occurrence of medium-large earthquakes. The methodology has been applied to seven different datasets, from three different regions, namely the Iberian Peninsula, Chile and Japan. Reported results show a remarkable improvement with respect to the former version, in terms of all evaluated quality measures. In particular, the number of false positives has decreased and the positive predictive values increased, both of them in a very remarkable manner.}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } A novel methodology is introduced in this research study to detect seismic precursors. Based on an existing approach, the new methodology searches for patterns in the historical data. Such patterns may contain statistical or soil dynamics information. It improves the original version in several aspects. First, new seismicity indicators have been used to characterize earthquakes. Second, a machine learning clustering algorithm has been applied in a very flexible way, thus allowing the discovery of new data groupings. Third, a novel search strategy is proposed in order to obtain non-overlapped patterns. And, fourth, arbitrary lengths of patterns are searched for, thus discovering long and short-term behaviors that may influence in the occurrence of medium-large earthquakes. The methodology has been applied to seven different datasets, from three different regions, namely the Iberian Peninsula, Chile and Japan. Reported results show a remarkable improvement with respect to the former version, in terms of all evaluated quality measures. In particular, the number of false positives has decreased and the positive predictive values increased, both of them in a very remarkable manner. |
X. Shang and X. Li and A. Morales-Esteban and G. Asencio-Cortes and Z. Wang Data field-based K-means clustering for spatio-temporal seismicity analysis and hazard assessment (Journal Article) Remote Sensing, 10 (461), pp. 1-22, 2018, ISSN: 2072-4292. (Abstract | Links | BibTeX | Tags: natural disasters, time series) @article{Shang2018b, title = {Data field-based K-means clustering for spatio-temporal seismicity analysis and hazard assessment}, author = {X. Shang and X. Li and A. Morales-Esteban and G. Asencio-Cortes and Z. Wang}, doi = {10.3390/rs10030461}, issn = {2072-4292}, year = {2018}, date = {2018-01-01}, journal = {Remote Sensing}, volume = {10}, number = {461}, pages = {1-22}, abstract = {Microseismic sensing taking advantage of sensors can remotely monitor seismic activities and evaluate seismic hazard. Compared with experts' seismic event clusters, clustering algorithms are more objective, and they can handle many seismic events. Many methods have been proposed for seismic event clustering and the K-means clustering technique has become the most famous one. However, K-means can be affected by noise events (large location error events) and initial cluster centers. In this paper, a data field-based K-means clustering methodology is proposed for seismicity analysis. The application of synthetic data and real seismic data have shown its effectiveness in removing noise events as well as finding good initial cluster centers. Furthermore, we introduced the time parameter into the K-means clustering process and applied it to seismic events obtained from the Chinese Yongshaba mine. The results show that the time-event location distance and data field-based K-means clustering can divide seismic events by both space and time, which provides a new insight for seismicity analysis compared with event location distance and data field-based K-means clustering. The Krzanowski-Lai (KL) index obtains a maximum value when the number of clusters is five: the energy index (EI) shows that clusters C1, C3 and C5 have very critical periods. In conclusion, the time-event location distance, and the data field-based K-means clustering can provide an effective methodology for seismicity analysis and hazard assessment. In addition, further study can be done by considering time-event location-magnitude distances.}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } Microseismic sensing taking advantage of sensors can remotely monitor seismic activities and evaluate seismic hazard. Compared with experts' seismic event clusters, clustering algorithms are more objective, and they can handle many seismic events. Many methods have been proposed for seismic event clustering and the K-means clustering technique has become the most famous one. However, K-means can be affected by noise events (large location error events) and initial cluster centers. In this paper, a data field-based K-means clustering methodology is proposed for seismicity analysis. The application of synthetic data and real seismic data have shown its effectiveness in removing noise events as well as finding good initial cluster centers. Furthermore, we introduced the time parameter into the K-means clustering process and applied it to seismic events obtained from the Chinese Yongshaba mine. The results show that the time-event location distance and data field-based K-means clustering can divide seismic events by both space and time, which provides a new insight for seismicity analysis compared with event location distance and data field-based K-means clustering. The Krzanowski-Lai (KL) index obtains a maximum value when the number of clusters is five: the energy index (EI) shows that clusters C1, C3 and C5 have very critical periods. In conclusion, the time-event location distance, and the data field-based K-means clustering can provide an effective methodology for seismicity analysis and hazard assessment. In addition, further study can be done by considering time-event location-magnitude distances. |
G. Asencio-Cortes and A. Morales-Esteban and X. Shang and F. Martinez-Alvarez Earthquake prediction in California using regression algorithms and cloud-based big data infrastructure (Journal Article) Computers and Geosciences, (115), pp. 198-210, 2018, ISSN: 0098-3004. (Abstract | Links | BibTeX | Tags: big data, natural disasters, time series) @article{Asencio-Cortes2018b, title = {Earthquake prediction in California using regression algorithms and cloud-based big data infrastructure}, author = {G. Asencio-Cortes and A. Morales-Esteban and X. Shang and F. Martinez-Alvarez}, doi = {10.1016/j.cageo.2017.10.011}, issn = {0098-3004}, year = {2018}, date = {2018-01-01}, journal = {Computers and Geosciences}, number = {115}, pages = {198-210}, abstract = {Earthquake magnitude prediction is a challenging problem that has been widely studied during the last decades. Statistical, geophysical and machine learning approaches can be found in literature, with no particularly satisfactory results. In recent years, powerful computational techniques to analyze big data have emerged, making possible the analysis of massive datasets. These new methods make use of physical resources like cloud based architectures. California is known for being one of the regions with highest seismic activity in the world and many data are available. In this work, the use of several regression algorithms combined with ensemble learning is explored in the context of big data (1 GB catalog is used), in order to predict earthquakes magnitude within the next seven days. Apache Spark framework, H2O library in R language and Amazon cloud infrastructure were been used, reporting very promising results.}, keywords = {big data, natural disasters, time series}, pubstate = {published}, tppubtype = {article} } Earthquake magnitude prediction is a challenging problem that has been widely studied during the last decades. Statistical, geophysical and machine learning approaches can be found in literature, with no particularly satisfactory results. In recent years, powerful computational techniques to analyze big data have emerged, making possible the analysis of massive datasets. These new methods make use of physical resources like cloud based architectures. California is known for being one of the regions with highest seismic activity in the world and many data are available. In this work, the use of several regression algorithms combined with ensemble learning is explored in the context of big data (1 GB catalog is used), in order to predict earthquakes magnitude within the next seven days. Apache Spark framework, H2O library in R language and Amazon cloud infrastructure were been used, reporting very promising results. |
A. Gomez-Losada and G. Asencio-Cortes and F. Martinez-Alvarez and J. C. Riquelme A novel approach to forecast urban surface-level ozone considering heterogeneous locations and limited information (Journal Article) Environmental Modelling and Software, (110), pp. 52-61, 2018, ISSN: 1364-8152. (Links | BibTeX | Tags: time series) @article{Gomez-Losada2018b, title = {A novel approach to forecast urban surface-level ozone considering heterogeneous locations and limited information}, author = {A. Gomez-Losada and G. Asencio-Cortes and F. Martinez-Alvarez and J. C. Riquelme}, doi = {10.1016/j.envsoft.2018.08.013}, issn = {1364-8152}, year = {2018}, date = {2018-01-01}, journal = {Environmental Modelling and Software}, number = {110}, pages = {52-61}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } |
N. Bokde and Marcus W. Beck and F. Martínez-Álvarez and K. Kulat A novel imputation methodology for time series based on pattern sequence forecasting (Journal Article) Pattern Recognition Letters, 116 , pp. 88-96, 2018. (Abstract | Links | BibTeX | Tags: time series) @article{BOKDE201888, title = {A novel imputation methodology for time series based on pattern sequence forecasting}, author = {N. Bokde and Marcus W. Beck and F. Martínez-Álvarez and K. Kulat}, url = {http://www.sciencedirect.com/science/article/pii/S0167865518306500}, doi = {10.1016/j.patrec.2018.09.020}, year = {2018}, date = {2018-01-01}, journal = {Pattern Recognition Letters}, volume = {116}, pages = {88-96}, abstract = {The Pattern Sequence Forecasting (PSF) algorithm is a previously described algorithm that identifies patterns in time series data and forecasts values using periodic characteristics of the observations. A new method for univariate time series is introduced that modifies the PSF algorithm to simultaneously forecast and backcast missing values for imputation. The imputePSF method extends PSF by characterizing repeating patterns of existing observations to provide a more precise estimate of missing values compared to more conventional methods, such as replacement with means or last observation carried forward. The imputation accuracy of imputePSF was evaluated by simulating varying amounts of missing observations with three univariate datasets. Comparisons of imputePSF with well-established methods using the same simulations demonstrated an overall reduction in error estimates. The imputePSF algorithm can produce more precise imputations on appropriate datasets, particularly those with periodic and repeating patterns.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } The Pattern Sequence Forecasting (PSF) algorithm is a previously described algorithm that identifies patterns in time series data and forecasts values using periodic characteristics of the observations. A new method for univariate time series is introduced that modifies the PSF algorithm to simultaneously forecast and backcast missing values for imputation. The imputePSF method extends PSF by characterizing repeating patterns of existing observations to provide a more precise estimate of missing values compared to more conventional methods, such as replacement with means or last observation carried forward. The imputation accuracy of imputePSF was evaluated by simulating varying amounts of missing observations with three univariate datasets. Comparisons of imputePSF with well-established methods using the same simulations demonstrated an overall reduction in error estimates. The imputePSF algorithm can produce more precise imputations on appropriate datasets, particularly those with periodic and repeating patterns. |
F. Divina and A. Gilson and F. Goméz-Vela and M. García-Torres and J. F. Torres Stacking ensemble learning for short-term electricity consumption forecasting (Journal Article) Energies, 11 (4), pp. 949, 2018. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energy2018, title = {Stacking ensemble learning for short-term electricity consumption forecasting}, author = {F. Divina and A. Gilson and F. Goméz-Vela and M. García-Torres and J. F. Torres}, url = {https://www.mdpi.com/1996-1073/11/4/949}, doi = {https://doi.org/10.3390/en11040949}, year = {2018}, date = {2018-01-01}, journal = {Energies}, volume = {11}, number = {4}, pages = {949}, abstract = {The ability to predict short-term electric energy demand would provide several benefits, both at the economic and environmental level. For example, it would allow for an efficient use of resources in order to face the actual demand, reducing the costs associated to the production as well as the emission of CO 2 . To this aim, in this paper we propose a strategy based on ensemble learning in order to tackle the short-term load forecasting problem. In particular, our approach is based on a stacking ensemble learning scheme, where the predictions produced by three base learning methods are used by a top level method in order to produce final predictions. We tested the proposed scheme on a dataset reporting the energy consumption in Spain over more than nine years. The obtained experimental results show that an approach for short-term electricity consumption forecasting based on ensemble learning can help in combining predictions produced by weaker learning methods in order to obtain superior results. In particular, the system produces a lower error with respect to the existing state-of-the art techniques used on the same dataset. More importantly, this case study has shown that using an ensemble scheme can achieve very accurate predictions, and thus that it is a suitable approach for addressing the short-term load forecasting problem.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } The ability to predict short-term electric energy demand would provide several benefits, both at the economic and environmental level. For example, it would allow for an efficient use of resources in order to face the actual demand, reducing the costs associated to the production as well as the emission of CO 2 . To this aim, in this paper we propose a strategy based on ensemble learning in order to tackle the short-term load forecasting problem. In particular, our approach is based on a stacking ensemble learning scheme, where the predictions produced by three base learning methods are used by a top level method in order to produce final predictions. We tested the proposed scheme on a dataset reporting the energy consumption in Spain over more than nine years. The obtained experimental results show that an approach for short-term electricity consumption forecasting based on ensemble learning can help in combining predictions produced by weaker learning methods in order to obtain superior results. In particular, the system produces a lower error with respect to the existing state-of-the art techniques used on the same dataset. More importantly, this case study has shown that using an ensemble scheme can achieve very accurate predictions, and thus that it is a suitable approach for addressing the short-term load forecasting problem. |
2017 |
J. F. Torres and A. Troncoso and F. Martínez-Álvarez Deep Learning - Based Approach for Time Series Forecasting with Application to Electricity Load (Conference) IWINAC International Work-Conference on the Interplay Between Natural and Artificial Computation, Lecture Notes in computer Science 2017. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{IWINAC2017, title = {Deep Learning - Based Approach for Time Series Forecasting with Application to Electricity Load}, author = {J. F. Torres and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-319-59773-7_21}, year = {2017}, date = {2017-01-01}, booktitle = {IWINAC International Work-Conference on the Interplay Between Natural and Artificial Computation}, series = {Lecture Notes in computer Science}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
A. Galicia and J. F. Torres and F. Martínez-Álvarez and A. Troncoso Scalable Forecasting Techniques Applied to Big Electricity Time Series (Conference) IWANN International Work-Conference on Artificial Neural Networks, Lecture Notes in Computer Science 2017. (Links | BibTeX | Tags: big data, energy, time series) @conference{IWANN2017, title = {Scalable Forecasting Techniques Applied to Big Electricity Time Series}, author = {A. Galicia and J. F. Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-319-59147-6_15}, year = {2017}, date = {2017-01-01}, booktitle = {IWANN International Work-Conference on Artificial Neural Networks}, series = {Lecture Notes in Computer Science}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
F. Martínez-Álvarez and A. Troncoso and J. C. Riquelme Recent Advances in energy Time Series Forecasting (Journal Article) Energies, 10 (6), pp. 809, 2017. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energies2017, title = {Recent Advances in energy Time Series Forecasting}, author = {F. Martínez-Álvarez and A. Troncoso and J. C. Riquelme}, url = {http://www.mdpi.com/1996-1073/10/6/809}, doi = {10.3390/en10060809}, year = {2017}, date = {2017-01-01}, journal = {Energies}, volume = {10}, number = {6}, pages = {809}, abstract = {This editorial summarizes the performance of the special issue entitled Energy Time Series Forecasting, which was published in MDPI’s Energies journal. The special issue took place in 2016 and accepted a total of 21 papers from twelve different countries. Electrical, solar, or wind energy forecasting were the most analyzed topics, introducing brand new methods with very sound results.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } This editorial summarizes the performance of the special issue entitled Energy Time Series Forecasting, which was published in MDPI’s Energies journal. The special issue took place in 2016 and accepted a total of 21 papers from twelve different countries. Electrical, solar, or wind energy forecasting were the most analyzed topics, introducing brand new methods with very sound results. |
F. Martínez-Álvarez and A. Troncoso and J. Reyes and M. Martínez-Ballesteros and J. C. Riquelme Applications of computational intelligence in Time Series (Journal Article) Computational Intelligence and Neuroscience, article id 9361749 , 2017. (Links | BibTeX | Tags: time series) @article{CIN2017, title = {Applications of computational intelligence in Time Series}, author = {F. Martínez-Álvarez and A. Troncoso and J. Reyes and M. Martínez-Ballesteros and J. C. Riquelme}, url = {https://www.hindawi.com/journals/cin/si/467684/}, doi = {10.1155/2017/9361749}, year = {2017}, date = {2017-01-01}, journal = {Computational Intelligence and Neuroscience}, volume = {article id 9361749}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } |
N. Bokde and A. Troncoso and G. Asencio-Cortés and K. Kulat and F. Martínez-Álvarez Pattern sequence similarity based techniques for wind speed forecasting (Conference) ITISE International Work-Conference on Time Series Analysis, 2017. (BibTeX | Tags: time series) @conference{ITISE2017, title = {Pattern sequence similarity based techniques for wind speed forecasting}, author = {N. Bokde and A. Troncoso and G. Asencio-Cortés and K. Kulat and F. Martínez-Álvarez}, year = {2017}, date = {2017-01-01}, booktitle = {ITISE International Work-Conference on Time Series Analysis}, keywords = {time series}, pubstate = {published}, tppubtype = {conference} } |
N. Bokde and G. Asencio-Cortes and F. Martinez-Alvarez and K. Kulat PSF: Introduction to R Package for Pattern Sequence Based Forecasting Algorithm (Journal Article) R Journal, 1 (9), pp. 324-333, 2017, ISSN: 2073-4859. (Abstract | BibTeX | Tags: time series) @article{Bokde2016a, title = {PSF: Introduction to R Package for Pattern Sequence Based Forecasting Algorithm}, author = {N. Bokde and G. Asencio-Cortes and F. Martinez-Alvarez and K. Kulat}, issn = {2073-4859}, year = {2017}, date = {2017-01-01}, journal = {R Journal}, volume = {1}, number = {9}, pages = {324-333}, abstract = {This paper discusses about an R package that implements the Pattern Sequence based Forecasting (PSF) algorithm, which was developed for univariate time series forecasting. This algorithm has been successfully applied to many different fields. The PSF algorithm consists of two major parts: clustering and prediction. The clustering part includes selection of the optimum number of clusters. It labels time series data with reference to such clusters. The prediction part includes functions like optimum window size selection for specific patterns and prediction of future values with reference to past pattern sequences. The PSF package consists of various functions to implement the PSF algorithm. It also contains a function which automates all other functions to obtain optimized prediction results. The aim of this package is to promote the PSF algorithm and to ease its implementation with minimum efforts. This paper describes all the functions in the PSF package with their syntax. It also provides a simple example of usage. Finally, the usefulness of this package is discussed by comparing it to auto.arima and ets, well-known time series forecasting functions available on CRAN repository.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } This paper discusses about an R package that implements the Pattern Sequence based Forecasting (PSF) algorithm, which was developed for univariate time series forecasting. This algorithm has been successfully applied to many different fields. The PSF algorithm consists of two major parts: clustering and prediction. The clustering part includes selection of the optimum number of clusters. It labels time series data with reference to such clusters. The prediction part includes functions like optimum window size selection for specific patterns and prediction of future values with reference to past pattern sequences. The PSF package consists of various functions to implement the PSF algorithm. It also contains a function which automates all other functions to obtain optimized prediction results. The aim of this package is to promote the PSF algorithm and to ease its implementation with minimum efforts. This paper describes all the functions in the PSF package with their syntax. It also provides a simple example of usage. Finally, the usefulness of this package is discussed by comparing it to auto.arima and ets, well-known time series forecasting functions available on CRAN repository. |
G. Asencio-Cortes and S. Scitovski and R. Scitovski and F. Martinez-Alvarez Temporal analysis of croatian seismogenic zones to improve earthquake magnitude prediction (Journal Article) Earth Science Informatics, 3 (10), pp. 303-320, 2017, ISSN: 1865-0481. (Links | BibTeX | Tags: natural disasters, time series) @article{AsencioCortes2017, title = {Temporal analysis of croatian seismogenic zones to improve earthquake magnitude prediction}, author = {G. Asencio-Cortes and S. Scitovski and R. Scitovski and F. Martinez-Alvarez}, doi = {10.1007/s12145-017-0295-5}, issn = {1865-0481}, year = {2017}, date = {2017-01-01}, journal = {Earth Science Informatics}, volume = {3}, number = {10}, pages = {303-320}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
J. L. Amaro-Mellado and A. Morales-Esteban and G. Asencio-Cortes and F. Martinez-Alvarez Comparing seismic parameters for different source zone models in the Iberian Peninsula (Journal Article) Tectonophysics, (717), pp. 449-472, 2017, ISSN: 0040-1951. (Abstract | Links | BibTeX | Tags: natural disasters, time series) @article{Amaro-Mellado2017, title = {Comparing seismic parameters for different source zone models in the Iberian Peninsula}, author = {J. L. Amaro-Mellado and A. Morales-Esteban and G. Asencio-Cortes and F. Martinez-Alvarez}, doi = {10.1016/j.tecto.2017.08.032}, issn = {0040-1951}, year = {2017}, date = {2017-01-01}, journal = {Tectonophysics}, number = {717}, pages = {449-472}, abstract = {Seismical parameters of five seismogenic zonings for the Iberian Peninsula have been determined in this work. For that purpose, this research has two key goals. The first is to generate a seismic catalog. The second to calculate the seismical parameters of all the zones of the seismogenic zonings selected. The first key goal has been the creation of a catalog of earthquakes for the Iberian Peninsula and adjacent areas. First, the National Geographic Institute of Spain's catalog has been completed and reviewed with the information from other catalog reviews and specific studies. Second, all magnitude calculations have been homogenized. Third, all dependent data have been eliminated through declustering. Finally, the year of completeness for each magnitude has been considered. The Quaternary active faults database of Iberia has also been used as input data. All of this information has been integrated into a geographic information system. The second key aim is the calculation of the seismical parameters. The first parameter obtained has been the b-value. A method which considers different years of completeness in accordance with the magnitude has been used. Also, the annual rate of earthquakes per square kilometer has been calculated. Moreover, the maximum magnitude known that Quaternary active faults might generate and maximum magnitude recorded in the catalog have been determined. Finally, based solely on the statistical parameters obtained, a critical discussion of the seismogenic zonings of the Iberian Peninsula has been conducted. The results show that some zonings possess insufficient data for a proper calculation of the seismic parameters, from a statistical point of view.}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } Seismical parameters of five seismogenic zonings for the Iberian Peninsula have been determined in this work. For that purpose, this research has two key goals. The first is to generate a seismic catalog. The second to calculate the seismical parameters of all the zones of the seismogenic zonings selected. The first key goal has been the creation of a catalog of earthquakes for the Iberian Peninsula and adjacent areas. First, the National Geographic Institute of Spain's catalog has been completed and reviewed with the information from other catalog reviews and specific studies. Second, all magnitude calculations have been homogenized. Third, all dependent data have been eliminated through declustering. Finally, the year of completeness for each magnitude has been considered. The Quaternary active faults database of Iberia has also been used as input data. All of this information has been integrated into a geographic information system. The second key aim is the calculation of the seismical parameters. The first parameter obtained has been the b-value. A method which considers different years of completeness in accordance with the magnitude has been used. Also, the annual rate of earthquakes per square kilometer has been calculated. Moreover, the maximum magnitude known that Quaternary active faults might generate and maximum magnitude recorded in the catalog have been determined. Finally, based solely on the statistical parameters obtained, a critical discussion of the seismogenic zonings of the Iberian Peninsula has been conducted. The results show that some zonings possess insufficient data for a proper calculation of the seismic parameters, from a statistical point of view. |
2016 |
G. Asencio-Cortés and E. Florido and A. Troncoso and F. Martínez-Álvarez A novel methodology to predict urban traffic congestion with ensemble learning (Journal Article) Knowledge and Information Systems, 20 , pp. 4205–4216, 2016. (Links | BibTeX | Tags: time series) @article{ASENCIO16, title = {A novel methodology to predict urban traffic congestion with ensemble learning}, author = {G. Asencio-Cortés and E. Florido and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/article/10.1007/s00500-016-2288-6}, doi = {https://doi.org/10.1007/s00500-016-2288-6}, year = {2016}, date = {2016-11-01}, journal = {Knowledge and Information Systems}, volume = {20}, pages = {4205–4216}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } |
G. Asencio-Cortés and F. Martínez-Álvarez Supervised learning applied to urban traffic congestion forecasting (Conference) KOI 16th International Conference on Operational Research, 2016, ISBN: 1849-5141. (Links | BibTeX | Tags: time series) @conference{ASENCIO16-2, title = {Supervised learning applied to urban traffic congestion forecasting}, author = {G. Asencio-Cortés and F. Martínez-Álvarez}, url = {http://hdoi.hr/koi2016/wp-content/uploads/2015/09/BookOfAbstracts2016-web.pdf}, isbn = {1849-5141}, year = {2016}, date = {2016-09-20}, booktitle = {KOI 16th International Conference on Operational Research}, pages = {139-140}, keywords = {time series}, pubstate = {published}, tppubtype = {conference} } |
D. Gutiérrez-Avilés and C. Rubio-Escudero TRIQ: A Comprehensive Evaluation Measure for Triclustering Algorithms (Conference) Hybrid Artificial Intelligent Systems: 11th International Conference, HAIS 2016, Seville, Spain, April 18-20, 2016, Proceedings, Lecture Notes in Computer Science 2016. (Links | BibTeX | Tags: bioinformatics, time series) @conference{Gutiérrez-Avilés2016, title = {TRIQ: A Comprehensive Evaluation Measure for Triclustering Algorithms}, author = {D. Gutiérrez-Avilés and C. Rubio-Escudero}, url = {https://link.springer.com/chapter/10.1007/978-3-319-32034-2_56}, year = {2016}, date = {2016-01-01}, booktitle = {Hybrid Artificial Intelligent Systems: 11th International Conference, HAIS 2016, Seville, Spain, April 18-20, 2016, Proceedings}, series = {Lecture Notes in Computer Science}, keywords = {bioinformatics, time series}, pubstate = {published}, tppubtype = {conference} } |
R. Talavera-Llames and R. Pérez-Chacón and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez A Nearest Neighbours - Based Algorithm for Big Time Series Data Forecasting (Conference) HAIS 11th International Conference on Hybrid Artificial Intelligence Systems, Lecture Note in Computer Science 2016. (Links | BibTeX | Tags: big data, energy, time series) @conference{HAIS2016b, title = {A Nearest Neighbours - Based Algorithm for Big Time Series Data Forecasting}, author = {R. Talavera-Llames and R. Pérez-Chacón and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-319-32034-2_15}, year = {2016}, date = {2016-01-01}, booktitle = {HAIS 11th International Conference on Hybrid Artificial Intelligence Systems}, series = {Lecture Note in Computer Science}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {conference} } |