Publications
2023 |
D. Hadjout and A. Sebaa and J. F. Torres and F. Mártinez-Álvarez Electricity consumption forecasting with outliers handling based on clustering and deep learning with application to the Algerian market (Journal Article) Expert Systems with Applications, 227 , pp. 120123, 2023. (Abstract | Links | BibTeX | Tags: clustering, deep learning, energy, time series) @article{HADJOUT23, title = {Electricity consumption forecasting with outliers handling based on clustering and deep learning with application to the Algerian market}, author = {D. Hadjout and A. Sebaa and J. F. Torres and F. Mártinez-Álvarez}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417423006255}, doi = {https://doi.org/10.1016/j.eswa.2023.120123}, year = {2023}, date = {2023-10-01}, journal = {Expert Systems with Applications}, volume = {227}, pages = {120123}, abstract = {The reduction of electricity loss and the effective management of electricity demand are vital operations for production and distribution electricity enterprises. To achieve these goals, accurate forecasts of aggregate and individual electricity consumers are necessary. A novel multistep forecasting method is developed to forecast medium-term electricity consumption of the Algerian economic sector. The proposed method goes through the following three steps: cleaning steps, clustering steps and forecasting step of each cluster. The aim of the first step is to detect and then replace outliers. To complete the first phase, Robust Exponential and Holt-Winters Smoothing algorithms are adapted. Then, to carry out accurate forecasting at a lowest level, K-Shape and K-Means clustering methods are utilized to extract similarities and identify customer consumption patterns as a second step. The third step entails developing a deep learning model based on Gated Recurrent Units to forecast the electricity consumption in each cluster. To validate the proposed method, we compared our results to the most known methods in literature like Autoregressive Integrated Moving Average, Seasonal Grey Model, LSTM networks, Temporal Convolutional Networks and two ensemble models. The results of several experiments conducted with 2000 electricity consumers during 14 years from an Algeria province (Bejaia) demonstrate that the proposed method provides remarkable prediction performances. Thus, prediction performances of the K-Shape-based clustering method reach much higher prediction accuracy. According to the MAPE metric, the results of the best predictions are equal to 2.04%. It is also notable that 87% of the clients have a considerably low prediction error.}, keywords = {clustering, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The reduction of electricity loss and the effective management of electricity demand are vital operations for production and distribution electricity enterprises. To achieve these goals, accurate forecasts of aggregate and individual electricity consumers are necessary. A novel multistep forecasting method is developed to forecast medium-term electricity consumption of the Algerian economic sector. The proposed method goes through the following three steps: cleaning steps, clustering steps and forecasting step of each cluster. The aim of the first step is to detect and then replace outliers. To complete the first phase, Robust Exponential and Holt-Winters Smoothing algorithms are adapted. Then, to carry out accurate forecasting at a lowest level, K-Shape and K-Means clustering methods are utilized to extract similarities and identify customer consumption patterns as a second step. The third step entails developing a deep learning model based on Gated Recurrent Units to forecast the electricity consumption in each cluster. To validate the proposed method, we compared our results to the most known methods in literature like Autoregressive Integrated Moving Average, Seasonal Grey Model, LSTM networks, Temporal Convolutional Networks and two ensemble models. The results of several experiments conducted with 2000 electricity consumers during 14 years from an Algeria province (Bejaia) demonstrate that the proposed method provides remarkable prediction performances. Thus, prediction performances of the K-Shape-based clustering method reach much higher prediction accuracy. According to the MAPE metric, the results of the best predictions are equal to 2.04%. It is also notable that 87% of the clients have a considerably low prediction error. |
J. F. Torres and S. Valencia and F. Martínez-Álvarez and N. Hoyos Predicting Wildfires in the Caribbean Using Multi-source Satellite Data and Deep Learning (Conference) IWANN 17th International Work-Conference on Artificial Neural Networks, 14135 , Lecture Notes in Computer Science 2023. (Links | BibTeX | Tags: deep learning, natural disasters, time series) @conference{TORRES23_IWANN, title = {Predicting Wildfires in the Caribbean Using Multi-source Satellite Data and Deep Learning}, author = {J. F. Torres and S. Valencia and F. Martínez-Álvarez and N. Hoyos}, url = {https://link.springer.com/chapter/10.1007/978-3-031-43078-7_1}, doi = {https://doi.org/10.1007/978-3-031-43078-7_1}, year = {2023}, date = {2023-09-30}, booktitle = {IWANN 17th International Work-Conference on Artificial Neural Networks}, volume = {14135}, pages = {3-14}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, natural disasters, time series}, pubstate = {published}, tppubtype = {conference} } |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals (Conference) IWANN 17th International Work-Conference on Artificial Neural Networks, 14134 , Lecture Notes in Computer Science 2023. (Links | BibTeX | Tags: deep learning, forecasting, time series) @conference{TRONCOSO-GARCIA23_IWANN, title = {Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20521-8_22}, doi = {https://doi.org/10.1007/978-3-030-20521-8_22}, year = {2023}, date = {2023-09-30}, booktitle = {IWANN 17th International Work-Conference on Artificial Neural Networks}, volume = {14134}, pages = {626–637}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, forecasting, time series}, pubstate = {published}, tppubtype = {conference} } |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés Embedded Temporal Feature Selection for Time Series Forecasting Using Deep Learning (Conference) IWANN 17th International Work-Conference on Artificial Neural Networks, 14135 , Lecture Notes in Computer Science 2023. (Links | BibTeX | Tags: deep learning, feature selection, time series) @conference{JIMENEZ-NAVARRO23_IWANN, title = {Embedded Temporal Feature Selection for Time Series Forecasting Using Deep Learning}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://link.springer.com/chapter/10.1007/978-3-031-43078-7_2}, doi = {https://doi.org/10.1007/978-3-031-43078-7_2}, year = {2023}, date = {2023-09-30}, booktitle = {IWANN 17th International Work-Conference on Artificial Neural Networks}, volume = {14135}, pages = {15-26}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, feature selection, time series}, pubstate = {published}, tppubtype = {conference} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado Springer, 749 , 2023, ISBN: 978-3-031-42529-5. (Links | BibTeX | Tags: big data, clustering, deep learning, IoT) @proceedings{SOCO2023a, title = {Proceedings of the 18th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2023) Salamanca, Spain, September 5-7, 2023, volume 1}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, editor = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-42529-5}, doi = {https://doi.org/10.1007/978-3-031-42529-5}, isbn = {978-3-031-42529-5}, year = {2023}, date = {2023-09-05}, volume = {749}, publisher = {Springer}, series = {Lecture Notes in Networks and Systems}, keywords = {big data, clustering, deep learning, IoT}, pubstate = {published}, tppubtype = {proceedings} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado Springer, 750 , 2023, ISBN: 978-3-031-42536-3. (Links | BibTeX | Tags: big data, clustering, deep learning, IoT) @proceedings{SOCO2023b, title = {Proceedings of the 18th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2023) Salamanca, Spain, September 5-7, 2023, volume 2}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, editor = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-42536-3}, doi = {10.1007/978-3-030-20055-8}, isbn = {978-3-031-42536-3}, year = {2023}, date = {2023-09-05}, volume = {750}, publisher = {Springer}, series = {Lecture Notes in Networks and Systems}, keywords = {big data, clustering, deep learning, IoT}, pubstate = {published}, tppubtype = {proceedings} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado Springer, 14001 , 2023, ISBN: 978-3-031-40725-3. (Links | BibTeX | Tags: big data, clustering, deep learning, IoT) @proceedings{HAIS2023, title = {Proceedings of the 18th International Conference on Hybrid Artificial Intelligent Systems (HAIS 2023) Salamanca, Spain, September 5-7, 2023}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, editor = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-40725-3}, doi = {https://doi.org/10.1007/978-3-031-40725-3}, isbn = {978-3-031-40725-3}, year = {2023}, date = {2023-09-05}, volume = {14001}, publisher = {Springer}, series = {Lecture Notes in Artificial Intelligence}, keywords = {big data, clustering, deep learning, IoT}, pubstate = {published}, tppubtype = {proceedings} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado Springer, 748 , 2023, ISBN: 978-3-031-42519-6. (Links | BibTeX | Tags: big data, clustering) @proceedings{CISIS-ICEUTE2023, title = {Proceedings of the International Joint Conference 16th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2023) 14th International Conference on EUropean Transnational Education (ICEUTE 2023). Salamanca, Spain, September 5-7, 2023}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, editor = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and F. Martínez-Álvarez and A. Troncoso and Á. Herrero and J. L. Calvo-Rolle and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-42519-6}, doi = {https://doi.org/10.1007/978-3-031-42519-6}, isbn = {978-3-031-42519-6}, year = {2023}, date = {2023-09-05}, volume = {748}, publisher = {Springer}, series = {Lecture Notes in Networks and Systems}, keywords = {big data, clustering}, pubstate = {published}, tppubtype = {proceedings} } |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés A New Deep Learning Architecture with Inductive Bias Balance for Oil Temperature Forecasting (Journal Article) Journal of Big Data, 10 , pp. 80, 2023. (Abstract | Links | BibTeX | Tags: deep learning, time series) @article{JIMENEZ-NAVARRO23c, title = {A New Deep Learning Architecture with Inductive Bias Balance for Oil Temperature Forecasting}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://journalofbigdata.springeropen.com/articles/10.1186/s40537-023-00745-0}, doi = {https://doi.org/10.1186/s40537-023-00745-0}, year = {2023}, date = {2023-05-28}, journal = {Journal of Big Data}, volume = {10}, pages = {80}, abstract = {Ensuring the optimal performance of power transformers is a laborious task in which the insulation system plays a vital role in decreasing their deterioration. The insulation system uses insulating oil to control temperature, as high temperatures can reduce the lifetime of the transformers and lead to expensive maintenance. Deep learning architectures have been demonstrated remarkable results in various fields. However, this improvement often comes at the cost of increased computing resources, which, in turn, increases the carbon footprint and hinders the optimization of architectures. In this study, we introduce a novel deep learning architecture that achieves a comparable efficacy to the best existing architectures in transformer oil temperature forecasting while improving efficiency. Effective forecasting can help prevent high temperatures and monitor the future condition of power transformers, thereby reducing unnecessary waste. To balance the inductive bias in our architecture, we propose the Smooth Residual Block, which divides the original problem into multiple subproblems to obtain different representations of the time series, collaboratively achieving the final forecasting. We applied our architecture to the Electricity Transformer datasets, which obtain transformer insulating oil temperature measures from two transformers in China. The results showed a 13% improvement in MSE and a 57% improvement in performance compared to the best current architectures, to the best of our knowledge. Moreover, we analyzed the architecture behavior to gain an intuitive understanding of the achieved solution.}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {article} } Ensuring the optimal performance of power transformers is a laborious task in which the insulation system plays a vital role in decreasing their deterioration. The insulation system uses insulating oil to control temperature, as high temperatures can reduce the lifetime of the transformers and lead to expensive maintenance. Deep learning architectures have been demonstrated remarkable results in various fields. However, this improvement often comes at the cost of increased computing resources, which, in turn, increases the carbon footprint and hinders the optimization of architectures. In this study, we introduce a novel deep learning architecture that achieves a comparable efficacy to the best existing architectures in transformer oil temperature forecasting while improving efficiency. Effective forecasting can help prevent high temperatures and monitor the future condition of power transformers, thereby reducing unnecessary waste. To balance the inductive bias in our architecture, we propose the Smooth Residual Block, which divides the original problem into multiple subproblems to obtain different representations of the time series, collaboratively achieving the final forecasting. We applied our architecture to the Electricity Transformer datasets, which obtain transformer insulating oil temperature measures from two transformers in China. The results showed a 13% improvement in MSE and a 57% improvement in performance compared to the best current architectures, to the best of our knowledge. Moreover, we analyzed the architecture behavior to gain an intuitive understanding of the achieved solution. |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés PHILNet: A Novel Efficient Approach for Time Series Forecasting using Deep Learning (Journal Article) Information Sciences, 632 , pp. 815-832, 2023. (Abstract | Links | BibTeX | Tags: deep learning, time series) @article{JIMENEZ-NAVARRO23b, title = {PHILNet: A Novel Efficient Approach for Time Series Forecasting using Deep Learning}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://doi.org/10.1016/j.ins.2023.03.021}, doi = {https://www.sciencedirect.com/science/article/pii/S0020025523003183?via%3Dihub}, year = {2023}, date = {2023-03-03}, journal = {Information Sciences}, volume = {632}, pages = {815-832}, abstract = {Time series is one of the most common data types in the industry nowadays. Forecasting the future of a time series behavior can be useful in planning ahead, saving time, resources, and helping avoid undesired scenarios. To make the forecasting, historical data is utilized due to the causal nature of the time series. Several deep learning algorithms have been presented in this area, where the input is processed through a series of non-linear functions to produce the output. We present a novel strategy to improve the performance of deep learning models in time series forecasting in terms of efficiency while reaching similar effectiveness. This approach separates the model into levels, starting with the easiest and continuing to the most difficult. The simpler levels deal with smoothed versions of the input, whereas the most sophisticated level deals with the raw data. This strategy seeks to mimic the human learning process, in which basic tasks are completed initially, followed by more precise and sophisticated ones. Our method achieved promising results, obtaining a 35% improvement in mean squared error and a 2.6 time decrease in training time compared with the best models found in a variety of time series.}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {article} } Time series is one of the most common data types in the industry nowadays. Forecasting the future of a time series behavior can be useful in planning ahead, saving time, resources, and helping avoid undesired scenarios. To make the forecasting, historical data is utilized due to the causal nature of the time series. Several deep learning algorithms have been presented in this area, where the input is processed through a series of non-linear functions to produce the output. We present a novel strategy to improve the performance of deep learning models in time series forecasting in terms of efficiency while reaching similar effectiveness. This approach separates the model into levels, starting with the easiest and continuing to the most difficult. The simpler levels deal with smoothed versions of the input, whereas the most sophisticated level deals with the raw data. This strategy seeks to mimic the human learning process, in which basic tasks are completed initially, followed by more precise and sophisticated ones. Our method achieved promising results, obtaining a 35% improvement in mean squared error and a 2.6 time decrease in training time compared with the best models found in a variety of time series. |
O. S. Mazari and A. Sebaa and J. L. Amaro-Mellado and F. Martínez-Álvarez Creating a homogenized earthquake catalog for Algeria and mapping the main seismic parameters using a geographic information system (Journal Article) Journal of African Earth Sciences, 201 , pp. 104859, 2023. (Abstract | Links | BibTeX | Tags: natural disasters) @article{MAZARI23, title = {Creating a homogenized earthquake catalog for Algeria and mapping the main seismic parameters using a geographic information system}, author = {O. S. Mazari and A. Sebaa and J. L. Amaro-Mellado and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S1464343X23000687}, doi = {https://doi.org/10.1016/j.jafrearsci.2023.104895}, year = {2023}, date = {2023-03-03}, journal = {Journal of African Earth Sciences}, volume = {201}, pages = {104859}, abstract = {A homogeneous earthquake catalog is an essential instrument to study earthquake occurrence patterns, employing diverse engineering applications. In this paper, we describe a series of compilation and processing steps to compile an updated earthquake catalog for Algeria, a North African country with relatively high seismic activity. The procedure consisted of several steps. First, a range of reliable catalogs were considered; second, the data was integrated and refined; third, magnitudes are homogenized from different kinds of magnitudes into moment magnitude (M_w); declustering is then performed; and, finally, the magnitude-year completeness was estimated. The resulting Algeria catalog is bounded by the geographical limits (19° - 38.5° N and 9.5° W - 12.5° E), and covers the 1960-2020 period. It includes 4021 seismic events, reported up to M_w 7.1. We also calculate a set of seismic parameters, namely M_max and b-value, and mapped them using a geographic information system. Thus, the territory is divided into cells based on different grids to conduct the analysis. The results of the seismic parameters mapping are discussed, highlighting significant details. Several cells presented a M_max between 6.0 and 7.1. Regarding the b-value, two regions (Oran and Constantine) presented a high b-value, implying low-stress areas, and three regions (Algiers, Batna, and Chlef) a low b-value (0.65- 0.85), suggesting high-stress areas. Finally, we suggest some recommendations for future seismic hazard assessment studies.}, keywords = {natural disasters}, pubstate = {published}, tppubtype = {article} } A homogeneous earthquake catalog is an essential instrument to study earthquake occurrence patterns, employing diverse engineering applications. In this paper, we describe a series of compilation and processing steps to compile an updated earthquake catalog for Algeria, a North African country with relatively high seismic activity. The procedure consisted of several steps. First, a range of reliable catalogs were considered; second, the data was integrated and refined; third, magnitudes are homogenized from different kinds of magnitudes into moment magnitude (M_w); declustering is then performed; and, finally, the magnitude-year completeness was estimated. The resulting Algeria catalog is bounded by the geographical limits (19° - 38.5° N and 9.5° W - 12.5° E), and covers the 1960-2020 period. It includes 4021 seismic events, reported up to M_w 7.1. We also calculate a set of seismic parameters, namely M_max and b-value, and mapped them using a geographic information system. Thus, the territory is divided into cells based on different grids to conduct the analysis. The results of the seismic parameters mapping are discussed, highlighting significant details. Several cells presented a M_max between 6.0 and 7.1. Regarding the b-value, two regions (Oran and Constantine) presented a high b-value, implying low-stress areas, and three regions (Algiers, Batna, and Chlef) a low b-value (0.65- 0.85), suggesting high-stress areas. Finally, we suggest some recommendations for future seismic hazard assessment studies. |
D. Azzouguer and A. Sebaa and D. Hadjout and F. Martínez-Álvarez Fraud Detection of Electricity Consumption using Robust Exponential and Holt-Winters Smoothing method (Conference) IEEE International Conference on Advanced Systems and Emergent Technologies, 2023. (Abstract | Links | BibTeX | Tags: energy, time series) @conference{AZZOUGUER23, title = {Fraud Detection of Electricity Consumption using Robust Exponential and Holt-Winters Smoothing method}, author = {D. Azzouguer and A. Sebaa and D. Hadjout and F. Martínez-Álvarez}, url = {https://ieeexplore.ieee.org/document/10150645}, doi = {10.1109/IC_ASET58101.2023.10150645}, year = {2023}, date = {2023-02-20}, booktitle = {IEEE International Conference on Advanced Systems and Emergent Technologies}, abstract = {Non-technical losses (NTL), especially fraud detection is very important for electricity distribution enterprises. Fraud detection allows for maximizing the effective economic return for such enterprises. This paper provides an electricity fraud detection approach based on robust exponential and Holt-Winters Smoothing methods. The proposed approach is a procedure that aims to discover the fraudulent behavior of electricity consumers and goes through three crucial steps: (1) the prediction of monthly consumption, (2) the detection of abnormal consumption of electrical meters, and (3) the detection of fraud cases of economic customers. The proposed model was trained and evaluated. Its experimental validation is achieved by using a large dataset of real users from the Algerian economic sector with almost 2000 clients and 14 years of monthly electricity consumption. The proposed solution revealed good performance compared to the literature and the comparison with the models implemented in this article: SARIMA for prediction and two sigma for anomaly detection. The results show highly efficient and realistic countermeasures to fraud detection, which leads us to say that this method is robust and can enhance company profit.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } Non-technical losses (NTL), especially fraud detection is very important for electricity distribution enterprises. Fraud detection allows for maximizing the effective economic return for such enterprises. This paper provides an electricity fraud detection approach based on robust exponential and Holt-Winters Smoothing methods. The proposed approach is a procedure that aims to discover the fraudulent behavior of electricity consumers and goes through three crucial steps: (1) the prediction of monthly consumption, (2) the detection of abnormal consumption of electrical meters, and (3) the detection of fraud cases of economic customers. The proposed model was trained and evaluated. Its experimental validation is achieved by using a large dataset of real users from the Algerian economic sector with almost 2000 clients and 14 years of monthly electricity consumption. The proposed solution revealed good performance compared to the literature and the comparison with the models implemented in this article: SARIMA for prediction and two sigma for anomaly detection. The results show highly efficient and realistic countermeasures to fraud detection, which leads us to say that this method is robust and can enhance company profit. |
E. T. Habtemariam and K. Kekeba and M. Martínez-Ballesteros and F. Mártinez-Álvarez A Bayesian Optimization-Based LSTM Model for Wind Power Forecasting in the Adama District, Ethiopia (Journal Article) Energies, 16 , pp. 2317, 2023. (Abstract | Links | BibTeX | Tags: deep learning, time series) @article{EJIGU23, title = {A Bayesian Optimization-Based LSTM Model for Wind Power Forecasting in the Adama District, Ethiopia}, author = {E. T. Habtemariam and K. Kekeba and M. Martínez-Ballesteros and F. Mártinez-Álvarez}, url = {https://www.mdpi.com/1996-1073/16/5/2317}, doi = {https://doi.org/10.3390/en16052317}, year = {2023}, date = {2023-02-19}, journal = {Energies}, volume = {16}, pages = {2317}, abstract = {Renewable energies such as solar and wind power have become promising sources of energy to address the increase in greenhouse gases caused by the use of fossil fuels and to resolve current energy crises. Integrating wind energy into a large-scale electric grid presents a significant challenge due to the high intermittency and nonlinear behavior of wind power. Accurate wind power forecasting is essential for safe and efficient integration into the grid system. Many prediction models have been developed to predict the uncertain and nonlinear time series of wind power, but most neglect the use of Bayesian optimization to optimize the hyperparameters while training deep learning algorithms. The efficiency of grid search strategies decreases as the number of hyperparameters increases, and computation time complexity becomes an issue. This paper presents a robust and optimized Long-Short Term Memory network for forecasting wind power generation in the day ahead in the context of Ethiopia's renewable energy sector. The proposal uses Bayesian optimization to find the best hyperparameter combination in a reasonable computation time. The results indicate that tuning hyperparameters using this metaheuristic prior to building deep learning models significantly improves the predictive performance of the models. The proposed models were evaluated using MAE, RMSE, and MAPE metrics and outperformed both the baseline models and the optimized Gated Recurrent Unit architecture.}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {article} } Renewable energies such as solar and wind power have become promising sources of energy to address the increase in greenhouse gases caused by the use of fossil fuels and to resolve current energy crises. Integrating wind energy into a large-scale electric grid presents a significant challenge due to the high intermittency and nonlinear behavior of wind power. Accurate wind power forecasting is essential for safe and efficient integration into the grid system. Many prediction models have been developed to predict the uncertain and nonlinear time series of wind power, but most neglect the use of Bayesian optimization to optimize the hyperparameters while training deep learning algorithms. The efficiency of grid search strategies decreases as the number of hyperparameters increases, and computation time complexity becomes an issue. This paper presents a robust and optimized Long-Short Term Memory network for forecasting wind power generation in the day ahead in the context of Ethiopia's renewable energy sector. The proposal uses Bayesian optimization to find the best hyperparameter combination in a reasonable computation time. The results indicate that tuning hyperparameters using this metaheuristic prior to building deep learning models significantly improves the predictive performance of the models. The proposed models were evaluated using MAE, RMSE, and MAPE metrics and outperformed both the baseline models and the optimized Gated Recurrent Unit architecture. |
A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez A new Apache Spark-based framework for big data streaming forecasting in IoT networks (Journal Article) Journal of Supercomputing, 79 , pp. 11078–11100, 2023. (Abstract | Links | BibTeX | Tags: big data, IoT) @article{FERNANDEZ23, title = {A new Apache Spark-based framework for big data streaming forecasting in IoT networks}, author = {A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/article/10.1007/s11227-023-05100-x}, doi = {https://doi.org/10.1007/s11227-023-05100-x}, year = {2023}, date = {2023-02-02}, journal = {Journal of Supercomputing}, volume = {79}, pages = {11078–11100}, abstract = {Analyzing time-dependent data acquired in a continuous flow is a major challenge for various fields, such as big data and machine learning. Being able to analyze a large volume of data from various sources, such as sensors, networks, and the internet, is essential for improving the efficiency of our society's production processes. Additionally, this vast amount of data is collected dynamically in a continuous stream. The goal of this research is to provide a comprehensive framework for forecasting big data streams from Internet of Things networks and serve as a guide for designing and deploying other third-party solutions. Hence, a new framework for time series forecasting in a big data streaming scenario, using data collected from Internet of Things networks, is presented. This framework comprises of five main modules: Internet of Things network design and deployment, big data streaming architecture, stream data modeling method, big data forecasting method, and a comprehensive real-world application scenario, consisting of a physical Internet of Things network feeding the big data streaming architecture, being the linear regression the algorithm used for illustrative purposes. Comparison with other frameworks reveals that this is the first framework that incorporates and integrates all the aforementioned modules.}, keywords = {big data, IoT}, pubstate = {published}, tppubtype = {article} } Analyzing time-dependent data acquired in a continuous flow is a major challenge for various fields, such as big data and machine learning. Being able to analyze a large volume of data from various sources, such as sensors, networks, and the internet, is essential for improving the efficiency of our society's production processes. Additionally, this vast amount of data is collected dynamically in a continuous stream. The goal of this research is to provide a comprehensive framework for forecasting big data streams from Internet of Things networks and serve as a guide for designing and deploying other third-party solutions. Hence, a new framework for time series forecasting in a big data streaming scenario, using data collected from Internet of Things networks, is presented. This framework comprises of five main modules: Internet of Things network design and deployment, big data streaming architecture, stream data modeling method, big data forecasting method, and a comprehensive real-world application scenario, consisting of a physical Internet of Things network feeding the big data streaming architecture, being the linear regression the algorithm used for illustrative purposes. Comparison with other frameworks reveals that this is the first framework that incorporates and integrates all the aforementioned modules. |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso A new approach based on association rules to add explainability to time series forecasting models (Journal Article) Information Fusion, 94 , pp. 169-180, 2023. (Abstract | Links | BibTeX | Tags: association rules, time series, XAI) @article{TRONCOSO-GARCIA23, title = {A new approach based on association rules to add explainability to time series forecasting models}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S1566253523000295}, doi = {10.1016/j.inffus.2023.01.021}, year = {2023}, date = {2023-01-22}, journal = {Information Fusion}, volume = {94}, pages = {169-180}, abstract = {Machine learning and deep learning have become the most useful and powerful tools in the last years to mine information from large datasets. Despite the successful application to many research fields, it is widely known that some of these solutions based on artificial intelligence are considered black-box models, meaning that most experts find difficult to explain and interpret the models and why they generate such outputs. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability. Thus, models could be easily understood and further applied. This work proposes a novel method to explain black-box models, by using numeric association rules to explain and interpret multi-step time series forecasting models. Thus, a multi-objective algorithm is used to discover quantitative association rules from the target model. Then, visual explanation techniques are applied to make the rules more interpretable. Data from Spanish electricity energy consumption has been used to assess the suitability of the proposal.}, keywords = {association rules, time series, XAI}, pubstate = {published}, tppubtype = {article} } Machine learning and deep learning have become the most useful and powerful tools in the last years to mine information from large datasets. Despite the successful application to many research fields, it is widely known that some of these solutions based on artificial intelligence are considered black-box models, meaning that most experts find difficult to explain and interpret the models and why they generate such outputs. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability. Thus, models could be easily understood and further applied. This work proposes a novel method to explain black-box models, by using numeric association rules to explain and interpret multi-step time series forecasting models. Thus, a multi-objective algorithm is used to discover quantitative association rules from the target model. Then, visual explanation techniques are applied to make the rules more interpretable. Data from Spanish electricity energy consumption has been used to assess the suitability of the proposal. |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso and G. Asencio-Cortés From Simple to Complex: A Sequential Method for Enhancing Time Series Forecasting with Deep Learning (Journal Article) Logic Journal of the IGPL, in press , 2023. (Abstract | BibTeX | Tags: deep learning, time series) @article{JIMENEZ-NAVARRO23a, title = {From Simple to Complex: A Sequential Method for Enhancing Time Series Forecasting with Deep Learning}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso and G. Asencio-Cortés}, year = {2023}, date = {2023-01-20}, journal = {Logic Journal of the IGPL}, volume = {in press}, abstract = {Time series forecasting is a well-known deep learning application field in which previous data are used to predict the future behavior of the series. Recently, several deep learning approaches have been proposed in which several nonlinear functions are applied to the input to obtain the output. In this paper, we introduce a novel method to improve the performance of deep learning models in time series forecasting. This method divides the model into hierarchies or levels from simpler to more complex ones. Simpler levels handle smoothed versions of the input, whereas the most complex level processes the original time series. This method follows the human learning process where general/simpler tasks are performed first, and afterward, more precise/harder ones are accomplished.Our proposed methodology has been applied to the LSTM architecture, showing remarkable performance in various time series. In addition, a comparison is reported including a standard LSTM and novel methods such as DeepAR, Temporal Fusion Transformer (TFT), NBEATS and Echo State Network (ESN).}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {article} } Time series forecasting is a well-known deep learning application field in which previous data are used to predict the future behavior of the series. Recently, several deep learning approaches have been proposed in which several nonlinear functions are applied to the input to obtain the output. In this paper, we introduce a novel method to improve the performance of deep learning models in time series forecasting. This method divides the model into hierarchies or levels from simpler to more complex ones. Simpler levels handle smoothed versions of the input, whereas the most complex level processes the original time series. This method follows the human learning process where general/simpler tasks are performed first, and afterward, more precise/harder ones are accomplished.Our proposed methodology has been applied to the LSTM architecture, showing remarkable performance in various time series. In addition, a comparison is reported including a standard LSTM and novel methods such as DeepAR, Temporal Fusion Transformer (TFT), NBEATS and Echo State Network (ESN). |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Brito and F. Martínez-Álvarez and G. Asencio-Cortés A bioinspired ensemble approach for multi-horizon reference evapotranspiration forecasting in Portugal (Conference) SAC 38th Annual ACM Symposium on Applied Computing, 2023. (Abstract | Links | BibTeX | Tags: deep learning, precision agriculture, time series) @conference{EVAPOCVOA23, title = {A bioinspired ensemble approach for multi-horizon reference evapotranspiration forecasting in Portugal}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Brito and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://dl.acm.org/doi/abs/10.1145/3555776.3578634}, doi = {https://doi.org/10.1145/3555776.3578634}, year = {2023}, date = {2023-01-01}, booktitle = {SAC 38th Annual ACM Symposium on Applied Computing}, pages = {441-448}, abstract = {The year 2022 was the driest year in Portugal since 1931 with 97% of territory in severe drought. Water is especially important for the agricultural sector in Portugal, as it represents 78% total consumption according to the Water Footprint report published in 2010. Reference evapotranspiration is essential due to its importance in optimal irrigation planning that reduces water consumption. This study analyzes and proposes a framework to forecast daily reference evapotranspiration at eight stations in Portugal from 2012 to 2022 without relying on public meteorological forecasts. The data include meteorological data obtained from sensors included in the stations. The goal is to perform a multi-horizon forecasting of reference evapotranspiration using the multiple related covariates. The framework combines the data processing and the analysis of several state-of-the-art forecasting methods including classical, linear, tree-based, artificial neural network and ensembles. Then, an ensemble of all trained models is proposed using a recent bioinspired metaheuristic named Coronavirus Optimization Algorithm to weight the predictions. The results in terms of MAE and MSE are reported, indicating that our approach achieved a MAE of 0.658.}, keywords = {deep learning, precision agriculture, time series}, pubstate = {published}, tppubtype = {conference} } The year 2022 was the driest year in Portugal since 1931 with 97% of territory in severe drought. Water is especially important for the agricultural sector in Portugal, as it represents 78% total consumption according to the Water Footprint report published in 2010. Reference evapotranspiration is essential due to its importance in optimal irrigation planning that reduces water consumption. This study analyzes and proposes a framework to forecast daily reference evapotranspiration at eight stations in Portugal from 2012 to 2022 without relying on public meteorological forecasts. The data include meteorological data obtained from sensors included in the stations. The goal is to perform a multi-horizon forecasting of reference evapotranspiration using the multiple related covariates. The framework combines the data processing and the analysis of several state-of-the-art forecasting methods including classical, linear, tree-based, artificial neural network and ensembles. Then, an ensemble of all trained models is proposed using a recent bioinspired metaheuristic named Coronavirus Optimization Algorithm to weight the predictions. The results in terms of MAE and MSE are reported, indicating that our approach achieved a MAE of 0.658. |
L. Melgar-García, M. Hosseini and A. Troncoso Identification of anomalies in urban sound data with Autoencoders (Conference) HAIS 18th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2023. (BibTeX | Tags: deep learning, IoT, time series) @conference{HAIS23_Laura, title = {Identification of anomalies in urban sound data with Autoencoders}, author = {L. Melgar-García, M. Hosseini and A. Troncoso}, year = {2023}, date = {2023-01-01}, booktitle = {HAIS 18th International Conference on Hybrid Artificial Intelligence Systems}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, IoT, time series}, pubstate = {published}, tppubtype = {conference} } |
E. Tefera and A. Troncoso and M. Martínez Ballesteros and F. Martínez-Álvarez A New Hybrid CNN-LSTM for Wind Power Forecasting in Ethiopia (Conference) HAIS 18th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2023. (BibTeX | Tags: deep learning, energy, time series) @conference{HAIS23_Ejigu, title = {A New Hybrid CNN-LSTM for Wind Power Forecasting in Ethiopia}, author = {E. Tefera and A. Troncoso and M. Martínez Ballesteros and F. Martínez-Álvarez}, year = {2023}, date = {2023-01-01}, booktitle = {HAIS 18th International Conference on Hybrid Artificial Intelligence Systems}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
L. Melgar-García, Á. Troncoso-García, D. Gutiérrez-Avilés, J. F. Torres and A. Troncoso Explainable Artificial Intelligence for Education: A Real Case of a University Subject Switched to Python (Conference) ICEUTE 14th International Conference on European Transnational Educational, Lecture Notes in Networks and Systems 2023. (BibTeX | Tags: education, XAI) @conference{ICEUTE23_Laura, title = {Explainable Artificial Intelligence for Education: A Real Case of a University Subject Switched to Python}, author = {L. Melgar-García, Á. Troncoso-García, D. Gutiérrez-Avilés, J. F. Torres and A. Troncoso}, year = {2023}, date = {2023-01-01}, booktitle = {ICEUTE 14th International Conference on European Transnational Educational}, series = {Lecture Notes in Networks and Systems}, keywords = {education, XAI}, pubstate = {published}, tppubtype = {conference} } |
A. M. Chacón-Maldonado and A.R. Troncoso-García and F. Martínez-Álvarez, G. Asencio-Cortés and A. Troncoso Olive oil fly population pest forecasting using explainable deep learning (Conference) SOCO 18th International Conference on Soft Computing Models in Industrial and Environmental Applications , Lecture Notes in Networks and Systems 2023. (BibTeX | Tags: precision agriculture, XAI) @conference{SOCO23_Andres, title = {Olive oil fly population pest forecasting using explainable deep learning}, author = {A. M. Chacón-Maldonado and A.R. Troncoso-García and F. Martínez-Álvarez, G. Asencio-Cortés and A. Troncoso}, year = {2023}, date = {2023-01-01}, booktitle = {SOCO 18th International Conference on Soft Computing Models in Industrial and Environmental Applications }, series = {Lecture Notes in Networks and Systems}, keywords = {precision agriculture, XAI}, pubstate = {published}, tppubtype = {conference} } |
P. Casas-Gómez and F. Martínez-Álvarez and A. Troncoso and J. C. Linares-Calderón Machine Learning Approaches for Predicting Tree Growth Trends based on Basal Area Increment (Conference) SOCO 18th International Conference on Soft Computing Models in Industrial and Environmental Applications, Lecture Notes in Networks and Systems 2023. (BibTeX | Tags: time series) @conference{SOCO22_Pablo, title = {Machine Learning Approaches for Predicting Tree Growth Trends based on Basal Area Increment}, author = {P. Casas-Gómez and F. Martínez-Álvarez and A. Troncoso and J. C. Linares-Calderón}, year = {2023}, date = {2023-01-01}, booktitle = {SOCO 18th International Conference on Soft Computing Models in Industrial and Environmental Applications}, series = {Lecture Notes in Networks and Systems}, keywords = {time series}, pubstate = {published}, tppubtype = {conference} } |
L. Melgar-García and F. Martínez-Álvarez and D. T. Bui and A. Troncoso A novel semantic segmentation approach based on U-Net, WU-Net, and U-Net++ deep learning for predicting areas sensitive to pluvial flood at tropical area (Journal Article) International Journal of Digital Earth, 16 (1), pp. 3661-3679, 2023. (Links | BibTeX | Tags: deep learning, natural disasters) @article{Melgar2023c, title = {A novel semantic segmentation approach based on U-Net, WU-Net, and U-Net++ deep learning for predicting areas sensitive to pluvial flood at tropical area}, author = {L. Melgar-García and F. Martínez-Álvarez and D. T. Bui and A. Troncoso}, url = {https://www.tandfonline.com/doi/full/10.1080/17538947.2023.2252401}, doi = {https://doi.org/10.1080/17538947.2023.2252401}, year = {2023}, date = {2023-01-01}, journal = {International Journal of Digital Earth}, volume = {16}, number = {1}, pages = {3661-3679}, keywords = {deep learning, natural disasters}, pubstate = {published}, tppubtype = {article} } |
P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso Streaming big time series forecasting based on nearest similar patterns with application to energy consumption (Journal Article) Logic Journal of the IGPL, 31 (2), pp. 255-270, 2023. (Abstract | Links | BibTeX | Tags: energy, IoT, time series) @article{jimenez2023, title = {Streaming big time series forecasting based on nearest similar patterns with application to energy consumption}, author = {P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso}, url = {https://academic.oup.com/jigpal/advance-article-abstract/doi/10.1093/jigpal/jzac017/6534493?redirectedFrom=fulltext}, doi = {https://doi.org/10.1093/jigpal/jzac017}, year = {2023}, date = {2023-01-01}, journal = {Logic Journal of the IGPL}, volume = {31}, number = {2}, pages = {255-270}, abstract = {This work presents a novel approach to forecast streaming big time series based on nearest similar patterns. This approach combines a clustering algorithm with a classifier and the nearest neighbors algorithm. It presents two separate stages: offline and online. The offline phase is for training and finding the best models for clustering, classification and the nearest neighbors algorithm. The online phase is to predict big time series in real time. In the offline phase, data are divided into clusters and a forecasting model based on the nearest neighbors is trained for each cluster. In addition, a classifier is trained using the cluster assignments previously generated by the clustering algorithm. In the online phase, the classifier predicts the cluster label of an instance, and the proper nearest neighbors model according to the predicted cluster label is applied to obtain the final prediction using the similar patterns. The algorithm is able to be updated incrementally for online learning from data streams. Results are reported using electricity consumption with a granularity of 10 minutes for 4-hour-ahead forecasting and compared with well-known online benchmark learners, showing a remarkable improvement in prediction accuracy.}, keywords = {energy, IoT, time series}, pubstate = {published}, tppubtype = {article} } This work presents a novel approach to forecast streaming big time series based on nearest similar patterns. This approach combines a clustering algorithm with a classifier and the nearest neighbors algorithm. It presents two separate stages: offline and online. The offline phase is for training and finding the best models for clustering, classification and the nearest neighbors algorithm. The online phase is to predict big time series in real time. In the offline phase, data are divided into clusters and a forecasting model based on the nearest neighbors is trained for each cluster. In addition, a classifier is trained using the cluster assignments previously generated by the clustering algorithm. In the online phase, the classifier predicts the cluster label of an instance, and the proper nearest neighbors model according to the predicted cluster label is applied to obtain the final prediction using the similar patterns. The algorithm is able to be updated incrementally for online learning from data streams. Results are reported using electricity consumption with a granularity of 10 minutes for 4-hour-ahead forecasting and compared with well-known online benchmark learners, showing a remarkable improvement in prediction accuracy. |
O. Cardozo and V. Ojeda and R. Parra and J. C. Mello-Román and J. L. Noguera Vázquez and M. García-Torres and F. Divina and S. Grillo and C. Villalba and J. Facon Dataset of fundus images for the diagnosis of ocular toxoplasmosis (Journal Article) Data in Brief, pp. 109056, 2023. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{cardozo2023dataset, title = {Dataset of fundus images for the diagnosis of ocular toxoplasmosis}, author = {O. Cardozo and V. Ojeda and R. Parra and J. C. Mello-Román and J. L. Noguera Vázquez and M. García-Torres and F. Divina and S. Grillo and C. Villalba and J. Facon}, url = {https://www.sciencedirect.com/science/article/pii/S2352340923001749}, doi = {10.1016/j.dib.2023.109056}, year = {2023}, date = {2023-01-01}, journal = {Data in Brief}, pages = {109056}, publisher = {Elsevier}, abstract = {Toxoplasmosis chorioretinitis is commonly diagnosed by an ophthalmologist through the evaluation of the fundus images of a patient. Early detection of these lesions may help to prevent blindness. In this article we present a data set of fundus images labeled into three categories: healthy eye, inactive and active chorioretinitis. The dataset was developed by three ophthalmologists with expertise in toxoplasmosis detection using fundus images. The dataset will be of great use to researchers working on ophthalmic image analysis using artificial intelligence techniques for the automatic detection of toxoplasmosis chorioretinitis.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Toxoplasmosis chorioretinitis is commonly diagnosed by an ophthalmologist through the evaluation of the fundus images of a patient. Early detection of these lesions may help to prevent blindness. In this article we present a data set of fundus images labeled into three categories: healthy eye, inactive and active chorioretinitis. The dataset was developed by three ophthalmologists with expertise in toxoplasmosis detection using fundus images. The dataset will be of great use to researchers working on ophthalmic image analysis using artificial intelligence techniques for the automatic detection of toxoplasmosis chorioretinitis. |
M. García-Torres and R. Ruiz and F. Divina Evolutionary feature selection on high dimensional data using a search space reduction approach (Journal Article) Engineering Applications of Artificial Intelligence, 117 , pp. 105556, 2023. (Abstract | Links | BibTeX | Tags: big data, feature selection) @article{garcia2023evolutionary, title = {Evolutionary feature selection on high dimensional data using a search space reduction approach}, author = {M. García-Torres and R. Ruiz and F. Divina}, url = {https://www.sciencedirect.com/science/article/pii/S0952197622005462}, doi = {10.1016/j.engappai.2022.105556}, year = {2023}, date = {2023-01-01}, journal = {Engineering Applications of Artificial Intelligence}, volume = {117}, pages = {105556}, publisher = {Elsevier}, abstract = {Feature selection is becoming more and more a challenging task due to the increase of the dimensionality of the data. The complexity of the interactions among features and the size of the search space make it unfeasible to find the optimal subset of features. In order to reduce the search space, feature grouping has arisen as an approach that allows to cluster feature according to the shared information about the class. On the other hand, metaheuristic algorithms have proven to achieve sub-optimal solutions within a reasonable time. In this work we propose a Scatter Search (SS) strategy that uses feature grouping to generate an initial population comprised of diverse and high quality solutions. Solutions are then evolved by applying random mechanisms in combination with the feature group structure, with the objective of maintaining during the search a population of good and, at the same time, as diverse as possible solutions. Not only does the proposed strategy provide the best subset of features found but it also reduces the redundancy structure of the data. We test the strategy on high dimensional data from biomedical and text-mining domains. The results are compared with those obtained by other adaptations of SS and other popular strategies. Results show that the proposed strategy can find, on average, the smallest subsets of features without degrading the performance of the classifier.}, keywords = {big data, feature selection}, pubstate = {published}, tppubtype = {article} } Feature selection is becoming more and more a challenging task due to the increase of the dimensionality of the data. The complexity of the interactions among features and the size of the search space make it unfeasible to find the optimal subset of features. In order to reduce the search space, feature grouping has arisen as an approach that allows to cluster feature according to the shared information about the class. On the other hand, metaheuristic algorithms have proven to achieve sub-optimal solutions within a reasonable time. In this work we propose a Scatter Search (SS) strategy that uses feature grouping to generate an initial population comprised of diverse and high quality solutions. Solutions are then evolved by applying random mechanisms in combination with the feature group structure, with the objective of maintaining during the search a population of good and, at the same time, as diverse as possible solutions. Not only does the proposed strategy provide the best subset of features found but it also reduces the redundancy structure of the data. We test the strategy on high dimensional data from biomedical and text-mining domains. The results are compared with those obtained by other adaptations of SS and other popular strategies. Results show that the proposed strategy can find, on average, the smallest subsets of features without degrading the performance of the classifier. |
Gaia Collaboration Gaia Data Release 3: Mapping the asymmetric disc of the Milky Way (Journal Article) Astronomy and Astrophysics, 2023. (Abstract | Links | BibTeX | Tags: astrostatistics) @article{collaboration2022gaia, title = {Gaia Data Release 3: Mapping the asymmetric disc of the Milky Way}, author = {Gaia Collaboration}, url = {https://www.aanda.org/component/article?access=doi&doi=10.1051/0004-6361/202243797}, doi = {10.1051/0004-6361/202243797}, year = {2023}, date = {2023-01-01}, journal = {Astronomy and Astrophysics}, abstract = {With the most recent Gaia data release the number of sources with complete 6D phase space information (position and velocity) has increased to well over 33 million stars, while stellar astrophysical parameters are provided for more than 470 million sources, in addition to the identification of over 11 million variable stars. Using the astrophysical parameters and variability classifications provided in Gaia DR3, we select various stellar populations to explore and identify non-axisymmetric features in the disc of the Milky Way in both configuration and velocity space. Using more about 580 thousand sources identified as hot OB stars, together with 988 known open clusters younger than 100 million years, we map the spiral structure associated with star formation 4-5 kpc from the Sun. We select over 2800 Classical Cepheids younger than 200 million years, which show spiral features extending as far as 10 kpc from the Sun in the outer disc. We also identify more than 8.7 million sources on the red giant branch (RGB), of which 5.7 million have line-of-sight velocities, allowing the velocity field of the Milky Way to be mapped as far as 8 kpc from the Sun, including the inner disc. The spiral structure revealed by the young populations is consistent with recent results using Gaia EDR3 astrometry and source lists based on near infrared photometry, showing the Local (Orion) arm to be at least 8 kpc long, and an outer arm consistent with what is seen in HI surveys, which seems to be a continuation of the Perseus arm into the third quadrant. Meanwhile, the subset of RGB stars with velocities clearly reveals the large scale kinematic signature of the bar in the inner disc, as well as evidence of streaming motions in the outer disc that might be associated with spiral arms or bar resonances. (abridged)}, keywords = {astrostatistics}, pubstate = {accepted}, tppubtype = {article} } With the most recent Gaia data release the number of sources with complete 6D phase space information (position and velocity) has increased to well over 33 million stars, while stellar astrophysical parameters are provided for more than 470 million sources, in addition to the identification of over 11 million variable stars. Using the astrophysical parameters and variability classifications provided in Gaia DR3, we select various stellar populations to explore and identify non-axisymmetric features in the disc of the Milky Way in both configuration and velocity space. Using more about 580 thousand sources identified as hot OB stars, together with 988 known open clusters younger than 100 million years, we map the spiral structure associated with star formation 4-5 kpc from the Sun. We select over 2800 Classical Cepheids younger than 200 million years, which show spiral features extending as far as 10 kpc from the Sun in the outer disc. We also identify more than 8.7 million sources on the red giant branch (RGB), of which 5.7 million have line-of-sight velocities, allowing the velocity field of the Milky Way to be mapped as far as 8 kpc from the Sun, including the inner disc. The spiral structure revealed by the young populations is consistent with recent results using Gaia EDR3 astrometry and source lists based on near infrared photometry, showing the Local (Orion) arm to be at least 8 kpc long, and an outer arm consistent with what is seen in HI surveys, which seems to be a continuation of the Perseus arm into the third quadrant. Meanwhile, the subset of RGB stars with velocities clearly reveals the large scale kinematic signature of the bar in the inner disc, as well as evidence of streaming motions in the outer disc that might be associated with spiral arms or bar resonances. (abridged) |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A.Troncoso Identifying novelties and anomalies for incremental learning in streaming time series forecasting (Journal Article) Engineering Applications of Artificial Intelligence, 123 , pp. 106326, 2023. (Links | BibTeX | Tags: energy, IoT, time series) @article{Melgar2023b, title = {Identifying novelties and anomalies for incremental learning in streaming time series forecasting}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A.Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0952197623005109}, doi = {https://doi.org/10.1016/j.engappai.2023.106326}, year = {2023}, date = {2023-01-01}, journal = {Engineering Applications of Artificial Intelligence}, volume = {123}, pages = {106326}, keywords = {energy, IoT, time series}, pubstate = {published}, tppubtype = {article} } |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso A novel distributed forecasting method based on information fusion and incremental learning for streaming time series (Journal Article) Information Fusion, 95 , pp. 163-173, 2023. (Links | BibTeX | Tags: energy, IoT, time series) @article{Melgar2023a, title = {A novel distributed forecasting method based on information fusion and incremental learning for streaming time series}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S1566253523000635}, doi = {https://doi.org/10.1016/j.inffus.2023.02.023}, year = {2023}, date = {2023-01-01}, journal = {Information Fusion}, volume = {95}, pages = {163-173}, keywords = {energy, IoT, time series}, pubstate = {published}, tppubtype = {article} } |
A. M. Chacón-Maldonado and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso FS-Studio: An extensive and efficient feature selection experimentation tool for Weka Explorer (Journal Article) SoftwareX, 23 , pp. 101401, 2023. (Links | BibTeX | Tags: feature selection) @article{Chacon2023, title = {FS-Studio: An extensive and efficient feature selection experimentation tool for Weka Explorer}, author = {A. M. Chacón-Maldonado and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S2352711023000973}, doi = {https://doi.org/10.1016/j.softx.2023.101401}, year = {2023}, date = {2023-01-01}, journal = {SoftwareX}, volume = {23}, pages = {101401}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } |
A. R. Troncoso-García and m. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso Evolutionary computation to explain deep learning models for time series forecasting (Conference) SAC 38th Annual ACM Symposium on Applied Computing, 2023. (Links | BibTeX | Tags: deep learning, time series, XAI) @conference{SAC2023, title = {Evolutionary computation to explain deep learning models for time series forecasting}, author = {A. R. Troncoso-García and m. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso}, url = {https://dl.acm.org/doi/abs/10.1145/3555776.3578994}, year = {2023}, date = {2023-01-01}, booktitle = {SAC 38th Annual ACM Symposium on Applied Computing}, keywords = {deep learning, time series, XAI}, pubstate = {published}, tppubtype = {conference} } |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals (Conference) IWANN International Work-conference on Artificial Intelligence, Lecture Notes in Computer Science 2023. (BibTeX | Tags: deep learning, feature selection, time series) @conference{IWANN2023, title = {Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso}, year = {2023}, date = {2023-01-01}, booktitle = {IWANN International Work-conference on Artificial Intelligence}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, feature selection, time series}, pubstate = {published}, tppubtype = {conference} } |
2022 |
M. Á. Molina and M. J. Jiménez-Navarro and R. Arjona and F. Mártinez-Álvarez and G. Asencio-Cortés DIAFAN-TL: An instance weighting-based transfer learning algorithm with application to phenology forecasting (Journal Article) Knowledge-Based Systems, 254 , pp. 109644, 2022. (Abstract | Links | BibTeX | Tags: time series, transfer learning) @article{MOLINA22, title = {DIAFAN-TL: An instance weighting-based transfer learning algorithm with application to phenology forecasting}, author = {M. Á. Molina and M. J. Jiménez-Navarro and R. Arjona and F. Mártinez-Álvarez and G. Asencio-Cortés}, url = {https://www.sciencedirect.com/science/article/pii/S0950705122008322}, doi = {https://doi.org/10.1016/j.knosys.2022.109644}, year = {2022}, date = {2022-10-22}, journal = {Knowledge-Based Systems}, volume = {254}, pages = {109644}, abstract = {The agricultural sector has been, and still is, the most important economic sector in many countries. Due to advances in technology, the amount and variety of available data have been increasing over the years. However, compared to other economic sectors, there is not always enough quality data for one particular domain (crops, plantations, plots) to obtain acceptable forecasting results with machine learning algorithms. In this context, transfer learning can help extract knowledge from different but related domains with enough data to transfer it to a target domain with scarce data. This process can overcome forecasting accuracy compared to training models uniquely with data from the target domain. In this work, a novel instance weighting-based transfer learning algorithm is proposed and applied to the phenology forecasting problem. A new metric named DIAFAN is proposed to weight samples from different source domains according to their relationship with the target domain, promoting the diversity of the information and avoiding inconsistent samples. Additionally, a set of validation schemes is specifically designed to ensure fair comparisons in terms of data volume with other benchmark transfer learning algorithms. The proposed algorithm, DIAFAN-TL, is tested with a proposed dataset of 16 plots of olive groves from different places, including information fusion from satellite images, meteorological stations and human field sampling of crop phenology. DIAFAN-TL achieves a remarkable improvement with respect to 15 other well-known transfer learning algorithms and three nontransfer learning scenarios. Finally, several performance analyses according to the different phenological states, prediction horizons and source domains are also performed.}, keywords = {time series, transfer learning}, pubstate = {published}, tppubtype = {article} } The agricultural sector has been, and still is, the most important economic sector in many countries. Due to advances in technology, the amount and variety of available data have been increasing over the years. However, compared to other economic sectors, there is not always enough quality data for one particular domain (crops, plantations, plots) to obtain acceptable forecasting results with machine learning algorithms. In this context, transfer learning can help extract knowledge from different but related domains with enough data to transfer it to a target domain with scarce data. This process can overcome forecasting accuracy compared to training models uniquely with data from the target domain. In this work, a novel instance weighting-based transfer learning algorithm is proposed and applied to the phenology forecasting problem. A new metric named DIAFAN is proposed to weight samples from different source domains according to their relationship with the target domain, promoting the diversity of the information and avoiding inconsistent samples. Additionally, a set of validation schemes is specifically designed to ensure fair comparisons in terms of data volume with other benchmark transfer learning algorithms. The proposed algorithm, DIAFAN-TL, is tested with a proposed dataset of 16 plots of olive groves from different places, including information fusion from satellite images, meteorological stations and human field sampling of crop phenology. DIAFAN-TL achieves a remarkable improvement with respect to 15 other well-known transfer learning algorithms and three nontransfer learning scenarios. Finally, several performance analyses according to the different phenological states, prediction horizons and source domains are also performed. |
A. M. Chacón-Maldonado and M. A. Molina and A. Troncoso and F. Martínez-Álvarez and G. Asencio-Cortés Olive Phenology Forecasting Using Information Fusion-Based Imbalanced Preprocessing and Automated Deep Learning (Conference) HAIS 17th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2022. (Links | BibTeX | Tags: deep learning, pattern recognition, time series) @conference{HAIS22_Andres, title = {Olive Phenology Forecasting Using Information Fusion-Based Imbalanced Preprocessing and Automated Deep Learning}, author = {A. M. Chacón-Maldonado and M. A. Molina and A. Troncoso and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://link.springer.com/chapter/10.1007/978-3-031-15471-3_24}, year = {2022}, date = {2022-09-12}, booktitle = {HAIS 17th International Conference on Hybrid Artificial Intelligence Systems}, journal = {HAIS 17th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2022}, pages = {274-285}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, pattern recognition, time series}, pubstate = {published}, tppubtype = {conference} } |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso Explainable machine learning for sleep apnea prediction (Conference) KES International Conference on Knowledge Based and Intelligent information and Engineering Systems, 2022. (Abstract | Links | BibTeX | Tags: association rules, deep learning, time series, XAI) @conference{TRONCOSO-GARCIA22, title = {Explainable machine learning for sleep apnea prediction}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922012406}, doi = {https://doi.org/10.1016/j.procs.2022.09.351}, year = {2022}, date = {2022-09-10}, booktitle = {KES International Conference on Knowledge Based and Intelligent information and Engineering Systems}, pages = {2930-2939}, abstract = {Machine and deep learning has become one of the most useful tools in the last years as a diagnosis-decision-support tool in the health area. However, it is widely known that artificial intelligence models are considered a black box and most experts experience difficulties explaining and interpreting the models and their results. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability so that models can be easily understood and further applied. Obstructive sleep apnea is a common chronic respiratory disease related to sleep. Its diagnosis nowadays is done by processing different data signals, such as electrocardiogram or respiratory rate. The waveform of the respiratory signal is of importance too. Machine learning models could be applied to the signal's analysis. Data from a polysomnography study for automatic sleep apnea detection have been used to evaluate the use of the Local Interpretable Model-Agnostic (LIME) library for explaining the health data models. Results obtained help to understand how several features have been used in the model and their influence in the quality of sleep.}, keywords = {association rules, deep learning, time series, XAI}, pubstate = {published}, tppubtype = {conference} } Machine and deep learning has become one of the most useful tools in the last years as a diagnosis-decision-support tool in the health area. However, it is widely known that artificial intelligence models are considered a black box and most experts experience difficulties explaining and interpreting the models and their results. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability so that models can be easily understood and further applied. Obstructive sleep apnea is a common chronic respiratory disease related to sleep. Its diagnosis nowadays is done by processing different data signals, such as electrocardiogram or respiratory rate. The waveform of the respiratory signal is of importance too. Machine learning models could be applied to the signal's analysis. Data from a polysomnography study for automatic sleep apnea detection have been used to evaluate the use of the Local Interpretable Model-Agnostic (LIME) library for explaining the health data models. Results obtained help to understand how several features have been used in the model and their influence in the quality of sleep. |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado Springer, 13469 , 2022, ISBN: 978-3-031-15470-6. (Links | BibTeX | Tags: big data, clustering, deep learning, IoT) @proceedings{HAIS2022, title = {Proceedings of the 17th International Conference on Hybrid Artificial Intelligent Systems (HAIS 2022) Salamanca, Spain, September 5-7, 2022}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-15471-3}, doi = {https://doi.org/10.1007/978-3-031-15471-3}, isbn = {978-3-031-15470-6}, year = {2022}, date = {2022-09-05}, volume = {13469}, publisher = {Springer}, series = {Lecture Notes in Artificial Intelligence}, keywords = {big data, clustering, deep learning, IoT}, pubstate = {published}, tppubtype = {proceedings} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado Springer, 531 , 2022, ISBN: 978-3-031-18050-7. (Links | BibTeX | Tags: big data, clustering, deep learning, IoT) @proceedings{SOCO2022, title = {Proceedings of the 17th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2022) Salamanca, Spain, September 5-7, 2022}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-18050-7}, doi = {https://doi.org/10.1007/978-3-031-18050-7}, isbn = {978-3-031-18050-7}, year = {2022}, date = {2022-09-05}, volume = {531}, publisher = {Springer}, series = {Lecture Notes in Networks and Systems}, keywords = {big data, clustering, deep learning, IoT}, pubstate = {published}, tppubtype = {proceedings} } |
P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado Springer, 532 , 2022, ISBN: 978-3-031-18409-3. (Links | BibTeX | Tags: big data, deep learning) @proceedings{CISIS-ICEUTE2022, title = {Proceedings of the International Joint Conference 15th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2022) 13th International Conference on EUropean Transnational Education (ICEUTE 2022). Salamanca, Spain, September 5-7, 2022}, author = {P. García-Bringas and H. Pérez-García and F. J. Martínez de Pisón and J. R. Villar-Flecha and A. Troncoso and E. A. de la Cal and Á. Herrero and F. Martínez-Álvarez and G. Psaila and H. Quintián and E. Corchado}, url = {https://link.springer.com/book/10.1007/978-3-031-18409-3}, doi = {https://doi.org/10.1007/978-3-031-18409-3}, isbn = {978-3-031-18409-3}, year = {2022}, date = {2022-09-05}, volume = {532}, publisher = {Springer}, series = {Lecture Notes in Networks and Systems}, keywords = {big data, deep learning}, pubstate = {published}, tppubtype = {proceedings} } |
D. Hadjout and J. F. Torres and A. Troncoso and A. Sebaa and F. Martínez-Álvarez Electricity consumption forecasting based on ensemble deep learning with application to the Algerian market (Journal Article) Energy, 243 , pp. 123060, 2022. (Abstract | Links | BibTeX | Tags: deep learning, energy, time series) @article{HADJOUT22, title = {Electricity consumption forecasting based on ensemble deep learning with application to the Algerian market}, author = {D. Hadjout and J. F. Torres and A. Troncoso and A. Sebaa and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0360544221033090}, doi = {https://doi.org/10.1016/j.energy.2021.123060}, year = {2022}, date = {2022-03-15}, journal = {Energy}, volume = {243}, pages = {123060}, abstract = {The economic sector is one of the most important pillars of countries. Economic activities of industry are intimately linked with the ability to meet their needs for electricity. Therefore, electricity forecasting is a very important task. It allows for better planning and management of energy resources. Several methods have been proposed to forecast energy consumption. In this work, to predict monthly electricity consumption for the economic sector, we develop a novel approach based on ensemble learning. Our approach combines three models that proved successful in the field, namely: Long Short Term Memory and Gated Recurrent Unit neural networks, and Temporal Convolutional Networks. The experiments have been conducted with almost 2000 clients and 14 years of monthly electricity consumption from Bejaia, Algeria. The results show that the proposed ensemble models achieve better performance than both the company's requirements and the prediction of the traditional individual models. Finally, statistical tests have been carried out to prove that significance of the ensemble models developed.}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The economic sector is one of the most important pillars of countries. Economic activities of industry are intimately linked with the ability to meet their needs for electricity. Therefore, electricity forecasting is a very important task. It allows for better planning and management of energy resources. Several methods have been proposed to forecast energy consumption. In this work, to predict monthly electricity consumption for the economic sector, we develop a novel approach based on ensemble learning. Our approach combines three models that proved successful in the field, namely: Long Short Term Memory and Gated Recurrent Unit neural networks, and Temporal Convolutional Networks. The experiments have been conducted with almost 2000 clients and 14 years of monthly electricity consumption from Bejaia, Algeria. The results show that the proposed ensemble models achieve better performance than both the company's requirements and the prediction of the traditional individual models. Finally, statistical tests have been carried out to prove that significance of the ensemble models developed. |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special issue SOCO-CISIS 2019-IGPL (Journal Article) Logig Journal of the IGPL, 30 (2), pp. 211-213, 2022. @article{MARTINEZ22b, title = {Special issue SOCO-CISIS 2019-IGPL}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://doi.org/10.1093/jigpal/jzaa066}, doi = {https://doi.org/10.1093/jigpal/jzaa066}, year = {2022}, date = {2022-03-10}, journal = {Logig Journal of the IGPL}, volume = {30}, number = {2}, pages = {211-213}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J. F. Torres and F. Martínez-Álvarez and A. Troncoso A deep LSTM network for the Spanish electricity consumption forecasting (Journal Article) Neural Computing and Applications, 34 , pp. 10533-10545, 2022. (Abstract | Links | BibTeX | Tags: deep learning, energy) @article{TORRES22b, title = {A deep LSTM network for the Spanish electricity consumption forecasting}, author = {J. F. Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://link.springer.com/article/10.1007/s00521-021-06773-2}, doi = {https://doi.org/10.1007/s00521-021-06773-2}, year = {2022}, date = {2022-02-05}, journal = {Neural Computing and Applications}, volume = {34}, pages = {10533-10545}, abstract = {Nowadays, electricity is a basic commodity necessary for the well-being of any modern society. Due to the growth in electricity consumption in recent years, mainly in large cities, electricity forecasting is key to the management of an efficient, sustainable and safe smart grid for the consumer. In this work, a deep neural network is proposed to address the electricity consumption forecasting in the short-term, namely, a long short-term memory (LSTM) network due to its ability to deal with sequential data such as time-series data. First, the optimal values for certain hyper-parameters have been obtained by a random search and a metaheuristic, called coronavirus optimization algorithm (CVOA), based on the propagation of the SARS-Cov-2 virus. Then, the optimal LSTM has been applied to predict the electricity demand with 4-h forecast horizon. Results using Spanish electricity data during nine years and half measured with 10-min frequency are presented and discussed. Finally, the performance of the proposed LSTM using random search and the LSTM using CVOA is compared, on the one hand, with that of recently published deep neural networks (such as a deep feed-forward neural network optimized with a grid search) and temporal fusion transformers optimized with a sampling algorithm, and, on the other hand, with traditional machine learning techniques, such as a linear regression, decision trees and tree-based ensemble techniques (gradient-boosted trees and random forest), achieving the smallest prediction error below 1.5%.}, keywords = {deep learning, energy}, pubstate = {published}, tppubtype = {article} } Nowadays, electricity is a basic commodity necessary for the well-being of any modern society. Due to the growth in electricity consumption in recent years, mainly in large cities, electricity forecasting is key to the management of an efficient, sustainable and safe smart grid for the consumer. In this work, a deep neural network is proposed to address the electricity consumption forecasting in the short-term, namely, a long short-term memory (LSTM) network due to its ability to deal with sequential data such as time-series data. First, the optimal values for certain hyper-parameters have been obtained by a random search and a metaheuristic, called coronavirus optimization algorithm (CVOA), based on the propagation of the SARS-Cov-2 virus. Then, the optimal LSTM has been applied to predict the electricity demand with 4-h forecast horizon. Results using Spanish electricity data during nine years and half measured with 10-min frequency are presented and discussed. Finally, the performance of the proposed LSTM using random search and the LSTM using CVOA is compared, on the one hand, with that of recently published deep neural networks (such as a deep feed-forward neural network optimized with a grid search) and temporal fusion transformers optimized with a sampling algorithm, and, on the other hand, with traditional machine learning techniques, such as a linear regression, decision trees and tree-based ensemble techniques (gradient-boosted trees and random forest), achieving the smallest prediction error below 1.5%. |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems (Journal Article) Neurocomputing, 470 , pp. 278-279, 2022. @article{MARTINEZ22, title = {Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0925231221001399}, doi = {https://doi.org/10.1016/j.neucom.2021.01.071}, year = {2022}, date = {2022-01-22}, journal = {Neurocomputing}, volume = {470}, pages = {278-279}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm (Journal Article) Information, 13 (44), pp. 1–16, 2022. (Abstract | Links | BibTeX | Tags: feature selection, time series) @article{losada2022, title = {Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm}, author = {A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown}, url = {https://www.mdpi.com/2078-2489/13/2/44}, doi = {10.3390/info13020044}, year = {2022}, date = {2022-01-01}, journal = {Information}, volume = {13}, number = {44}, pages = {1--16}, abstract = {Purchase processes on Amazon Marketplace begin at the Buy Box, which represents the buy click process through which numerous sellers compete. This study aimed to estimate empirically the relevant seller characteristics that Amazon could consider featuring in the Buy Box. To that end, 22 product categories from Italy’s Amazon web page were studied over a ten-month period, and the sellers were analyzed through their products featured in the Buy Box. Two different experiments were proposed and the results were analyzed using four classification algorithms (a neural network, random forest, support vector machine, and C5.0 decision trees) and a rule-based classification. The first experiment aimed to characterize sellers unspecifically by predicting their change at the Buy Box. The second one aimed to predict which seller would be featured in it. Both experiments revealed that the customer experience and the dynamics of the sellers’ prices were important features of the Buy Box. Additionally, we proposed a set of default features that Amazon could consider when no information about sellers was available. We also proposed the possible existence of a relationship or composition among important features that could be used for sellers to be featured in the Buy Box.}, keywords = {feature selection, time series}, pubstate = {published}, tppubtype = {article} } Purchase processes on Amazon Marketplace begin at the Buy Box, which represents the buy click process through which numerous sellers compete. This study aimed to estimate empirically the relevant seller characteristics that Amazon could consider featuring in the Buy Box. To that end, 22 product categories from Italy’s Amazon web page were studied over a ten-month period, and the sellers were analyzed through their products featured in the Buy Box. Two different experiments were proposed and the results were analyzed using four classification algorithms (a neural network, random forest, support vector machine, and C5.0 decision trees) and a rule-based classification. The first experiment aimed to characterize sellers unspecifically by predicting their change at the Buy Box. The second one aimed to predict which seller would be featured in it. Both experiments revealed that the customer experience and the dynamics of the sellers’ prices were important features of the Buy Box. Additionally, we proposed a set of default features that Amazon could consider when no information about sellers was available. We also proposed the possible existence of a relationship or composition among important features that could be used for sellers to be featured in the Buy Box. |
M.A. Castán-Lascorz and P. Jiménez-Herrera and A. Troncoso and G. Asencio-Cortés A new hybrid method for predicting univariate and multivariate time series based on pattern forecasting (Journal Article) Information Sciences, 586 , pp. 611–627, 2022. (Abstract | Links | BibTeX | Tags: energy, pattern recognition, time series) @article{castan2022, title = {A new hybrid method for predicting univariate and multivariate time series based on pattern forecasting}, author = {M.A. Castán-Lascorz and P. Jiménez-Herrera and A. Troncoso and G. Asencio-Cortés}, url = {https://www.sciencedirect.com/science/article/pii/S0020025521012226?via%3Dihub}, doi = {10.1016/j.ins.2021.12.001}, year = {2022}, date = {2022-01-01}, journal = {Information Sciences}, volume = {586}, pages = {611--627}, abstract = {Time series forecasting has become indispensable for multiple applications and industrial processes. Currently, a large number of algorithms have been developed to forecast time series, all of which are suitable depending on the characteristics and patterns to be inferred in each case. In this work, a new algorithm is proposed to predict both univariate and multivariate time series based on a combination of clustering, classification and forecasting techniques. The main goal of the proposed algorithm is first to group windows of time series values with similar patterns by applying a clustering process. Then, a specific forecasting model for each pattern is built and training is only conducted with the time windows corresponding to that pattern. The new algorithm has been designed using a flexible framework that allows the model to be generated using any combination of approaches within multiple machine learning techniques. To evaluate the model, several experiments are carried out using different configurations of the clustering, classification and forecasting methods that the model consists of. The results are analyzed and compared to classical prediction models, such as autoregressive, integrated, moving average and Holt-Winters models, to very recent forecasting methods, including deep, long short-term memory neural networks, and to well-known methods in the literature, such as k nearest neighbors, classification and regression trees, as well as random forest.}, keywords = {energy, pattern recognition, time series}, pubstate = {published}, tppubtype = {article} } Time series forecasting has become indispensable for multiple applications and industrial processes. Currently, a large number of algorithms have been developed to forecast time series, all of which are suitable depending on the characteristics and patterns to be inferred in each case. In this work, a new algorithm is proposed to predict both univariate and multivariate time series based on a combination of clustering, classification and forecasting techniques. The main goal of the proposed algorithm is first to group windows of time series values with similar patterns by applying a clustering process. Then, a specific forecasting model for each pattern is built and training is only conducted with the time windows corresponding to that pattern. The new algorithm has been designed using a flexible framework that allows the model to be generated using any combination of approaches within multiple machine learning techniques. To evaluate the model, several experiments are carried out using different configurations of the clustering, classification and forecasting methods that the model consists of. The results are analyzed and compared to classical prediction models, such as autoregressive, integrated, moving average and Holt-Winters models, to very recent forecasting methods, including deep, long short-term memory neural networks, and to well-known methods in the literature, such as k nearest neighbors, classification and regression trees, as well as random forest. |
G. Velázquez and F. Morales and M. García-Torres and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and F. Daumas-Ladouce and C. Ayala and D. Pinto-Roaand P. Gardel-Sotomayor Distribution level Electric current consumption and meteorological data set of the East region of Paraguay (Journal Article) Data in Brief, 40 , pp. 107699, 2022. (Abstract | Links | BibTeX | Tags: energy, time series) @article{velazquez2022distribution, title = {Distribution level Electric current consumption and meteorological data set of the East region of Paraguay}, author = {G. Velázquez and F. Morales and M. García-Torres and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and F. Daumas-Ladouce and C. Ayala and D. Pinto-Roaand P. Gardel-Sotomayor}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921009744}, doi = {10.1016/j.dib.2021.107699}, year = {2022}, date = {2022-01-01}, journal = {Data in Brief}, volume = {40}, pages = {107699}, publisher = {Elsevier pubstate = published}, abstract = {This paper presents a data set with information on meteorological data and electricity consumption in the department of Alto Paraná, Paraguay. The meteorological data were registered every three hours at the Aeropuerto Guarani, Department of Alto Paraná, which belongs to the Dirección Nacional de Aeronáutica Civil of Paraguay. The final data consists of a total of 22.445 records of temperature, relative humidity, wind speed and atmospheric pressure. On the other hand, the electrical energy consumption data set contains a total of 1.848.947 records, all of them coming from the one hundred and fifteen feeders located throughout the Alto Paraná region of Paraguay. Electrical energy consumption data was provided by Administración Nacional de Electricidad (ANDE). The analysis of this data can yield insights regarding the energy consumption in the area.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } This paper presents a data set with information on meteorological data and electricity consumption in the department of Alto Paraná, Paraguay. The meteorological data were registered every three hours at the Aeropuerto Guarani, Department of Alto Paraná, which belongs to the Dirección Nacional de Aeronáutica Civil of Paraguay. The final data consists of a total of 22.445 records of temperature, relative humidity, wind speed and atmospheric pressure. On the other hand, the electrical energy consumption data set contains a total of 1.848.947 records, all of them coming from the one hundred and fifteen feeders located throughout the Alto Paraná region of Paraguay. Electrical energy consumption data was provided by Administración Nacional de Electricidad (ANDE). The analysis of this data can yield insights regarding the energy consumption in the area. |
S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty (Journal Article) Entropy, 24 (1), pp. 64, 2022. (Abstract | Links | BibTeX | Tags: feature selection) @article{gomez2022measuring, title = {Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty}, author = {S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer}, url = {https://www.mdpi.com/1099-4300/24/1/64}, doi = {10.3390/e24010064}, year = {2022}, date = {2022-01-01}, journal = {Entropy}, volume = {24}, number = {1}, pages = {64}, publisher = {Multidisciplinary Digital Publishing Institute}, abstract = {Interaction between variables is often found in statistical models, and it is usually expressed in the model as an additional term when the variables are numeric. However, when the variables are categorical (also known as nominal or qualitative) or mixed numerical-categorical, defining, detecting, and measuring interactions is not a simple task. In this work, based on an entropy-based correlation measure for n nominal variables (named as Multivariate Symmetrical Uncertainty (MSU)), we propose a formal and broader definition for the interaction of the variables. Two series of experiments are presented. In the first series, we observe that datasets where some record types or combinations of categories are absent, forming patterns of records, which often display interactions among their attributes. In the second series, the interaction/non-interaction behavior of a regression model (entirely built on continuous variables) gets successfully replicated under a discretized version of the dataset. It is shown that there is an interaction-wise correspondence between the continuous and the discretized versions of the dataset. Hence, we demonstrate that the proposed definition of interaction enabled by the MSU is a valuable tool for detecting and measuring interactions within linear and non-linear models.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } Interaction between variables is often found in statistical models, and it is usually expressed in the model as an additional term when the variables are numeric. However, when the variables are categorical (also known as nominal or qualitative) or mixed numerical-categorical, defining, detecting, and measuring interactions is not a simple task. In this work, based on an entropy-based correlation measure for n nominal variables (named as Multivariate Symmetrical Uncertainty (MSU)), we propose a formal and broader definition for the interaction of the variables. Two series of experiments are presented. In the first series, we observe that datasets where some record types or combinations of categories are absent, forming patterns of records, which often display interactions among their attributes. In the second series, the interaction/non-interaction behavior of a regression model (entirely built on continuous variables) gets successfully replicated under a discretized version of the dataset. It is shown that there is an interaction-wise correspondence between the continuous and the discretized versions of the dataset. Hence, we demonstrate that the proposed definition of interaction enabled by the MSU is a valuable tool for detecting and measuring interactions within linear and non-linear models. |
C. Segarra-Martín and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez A novel approach to discover numerical association based on the Coronavirus Optimization Algorithm (Conference) SAC 37th Symposium On Applied Computing, 2022. (Abstract | BibTeX | Tags: association rules) @conference{SAC2022, title = {A novel approach to discover numerical association based on the Coronavirus Optimization Algorithm }, author = {C. Segarra-Martín and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez}, year = {2022}, date = {2022-01-01}, booktitle = {SAC 37th Symposium On Applied Computing}, abstract = {The disease caused by the SARS-CoV-2 (COVID-19) has affected millions of people around the world since its detection in 2019. This pandemic inspired the development of the Coronavirus Optimization Algorithm (CVOA), a bio-inspired metaheuristic that was originally used to adjust deep learning models for time series forecasting, by means of a binary codification. In this paper, a integer codification for the CVOA individual is introduced and used for optimizing a novel approach for numerical association rules mining. In addition, the CVOA setting parameters have been updated and a vaccination rate based on real data has been incorporated, to make it more efficient. As an application case, the prediction of earthquakes of large magnitude has been addressed. This kind of events are rare and, therefore, they can be characterized by rules with very high interest or lift and low support. Thus, the algorithm has been applied to the extraction of rules meeting specific criteria in an earthquake data set, provided by the National Geographic Institute of Spain. The results show CVOA as a promising tool for numerical association rules mining, obtaining rules with useful and meaningful information for predicting the occurrence of large earthquakes.}, keywords = {association rules}, pubstate = {published}, tppubtype = {conference} } The disease caused by the SARS-CoV-2 (COVID-19) has affected millions of people around the world since its detection in 2019. This pandemic inspired the development of the Coronavirus Optimization Algorithm (CVOA), a bio-inspired metaheuristic that was originally used to adjust deep learning models for time series forecasting, by means of a binary codification. In this paper, a integer codification for the CVOA individual is introduced and used for optimizing a novel approach for numerical association rules mining. In addition, the CVOA setting parameters have been updated and a vaccination rate based on real data has been incorporated, to make it more efficient. As an application case, the prediction of earthquakes of large magnitude has been addressed. This kind of events are rare and, therefore, they can be characterized by rules with very high interest or lift and low support. Thus, the algorithm has been applied to the extraction of rules meeting specific criteria in an earthquake data set, provided by the National Geographic Institute of Spain. The results show CVOA as a promising tool for numerical association rules mining, obtaining rules with useful and meaningful information for predicting the occurrence of large earthquakes. |
L. Melgar-García and D. Gutiérrez-Avilés and M. T. Godinho and R. Espada and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero A new big data triclustering approach for extracting three-dimensional patterns in precision agriculture (Journal Article) Neurocomputing, 500 , pp. 268-278, 2022. (Abstract | Links | BibTeX | Tags: big data, pattern recognition) @article{MELGAR21_NEUCOMb, title = {A new big data triclustering approach for extracting three-dimensional patterns in precision agriculture}, author = {L. Melgar-García and D. Gutiérrez-Avilés and M. T. Godinho and R. Espada and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0925231222006415}, doi = {https://doi.org/10.1016/j.neucom.2021.06.101}, year = {2022}, date = {2022-01-01}, journal = {Neurocomputing}, volume = {500}, pages = {268-278}, abstract = {Precision agriculture focuses on the development of site-specific harvest considering the variability of each crop area. Vegetation indices allow the study and delineation of different characteristics of each field zone, generally invisible to the naked-eye. This paper introduces a new big data triclustering approach based on evolutionary algorithms. The algorithm shows its capability to discover three-dimensional patterns on the basis of vegetation indices from vine crops. Different vegetation indices have been tested to find different patterns in the crops. The results reported using a vineyard crop located in Portugal depicts four areas with different moisture stress particularities that can lead to changes in the management of the vineyard. Furthermore, scalability studies have been performed, showing that the proposed algorithm is suitable for dealing with big datasets.}, keywords = {big data, pattern recognition}, pubstate = {published}, tppubtype = {article} } Precision agriculture focuses on the development of site-specific harvest considering the variability of each crop area. Vegetation indices allow the study and delineation of different characteristics of each field zone, generally invisible to the naked-eye. This paper introduces a new big data triclustering approach based on evolutionary algorithms. The algorithm shows its capability to discover three-dimensional patterns on the basis of vegetation indices from vine crops. Different vegetation indices have been tested to find different patterns in the crops. The results reported using a vineyard crop located in Portugal depicts four areas with different moisture stress particularities that can lead to changes in the management of the vineyard. Furthermore, scalability studies have been performed, showing that the proposed algorithm is suitable for dealing with big datasets. |
J. A. Gallardo-Gómez and F. Divina and A. Troncoso and F. Martínez-Álvarez Explainable Artificial Intelligence for the Electric Vehicle Load Demand Forecasting Problem (Conference) SOCO 17th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2022. (Links | BibTeX | Tags: big data, energy, time series) @conference{gallardo2022explainable, title = {Explainable Artificial Intelligence for the Electric Vehicle Load Demand Forecasting Problem}, author = {J. A. Gallardo-Gómez and F. Divina and A. Troncoso and F. Martínez-Álvarez }, url = {https://link.springer.com/chapter/10.1007/978-3-030-87869-6_65}, year = {2022}, date = {2022-01-01}, booktitle = {SOCO 17th International Conference on Soft Computing Models in Industrial and Environmental Applications}, pages = {413-422}, series = { Advances in Intelligent Systems and Computing}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
F. Delgado-Chaves and P. M. Martínez-García and A. Herrero-Ruiz and F. Gómez-Vela and F. Divina and S. Jimeno-González and F. Cortés-Ledesma Data of transcriptional effects of the merbarone-mediated inhibition of TOP2 (Journal Article) Data in Brief, 44 , pp. 108499, 2022. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{delgado2022data, title = {Data of transcriptional effects of the merbarone-mediated inhibition of TOP2}, author = {F. Delgado-Chaves and P. M. Martínez-García and A. Herrero-Ruiz and F. Gómez-Vela and F. Divina and S. Jimeno-González and F. Cortés-Ledesma}, url = {https://www.sciencedirect.com/science/article/pii/S235234092200693X}, doi = {10.1016/j.dib.2022.108499}, year = {2022}, date = {2022-01-01}, journal = {Data in Brief}, volume = {44}, pages = {108499}, publisher = {Elsevier}, abstract = {Type II DNA topoisomerases relax topological stress by transiently gating DNA passage in a controlled cut-and-reseal mechanism that affects both DNA strands. Therefore, they are essential to overcome topological problems associated with DNA metabolism. Their aberrant activity results in the generation of DNA double-strand breaks, which can seriously compromise cell survival and genome integrity. Here, we profile the transcriptome of human-telomerase-immortalized retinal pigment epithelial 1 (RPE-1) cells when treated with merbarone, a drug that catalytically inhibits type II DNA topoisomerases. We performed RNA-Seq after 4 and 8 h of merbarone treatment and compared transcriptional profiles versus untreated samples. We report raw sequencing data together with lists of gene counts and differentially expressed genes.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Type II DNA topoisomerases relax topological stress by transiently gating DNA passage in a controlled cut-and-reseal mechanism that affects both DNA strands. Therefore, they are essential to overcome topological problems associated with DNA metabolism. Their aberrant activity results in the generation of DNA double-strand breaks, which can seriously compromise cell survival and genome integrity. Here, we profile the transcriptome of human-telomerase-immortalized retinal pigment epithelial 1 (RPE-1) cells when treated with merbarone, a drug that catalytically inhibits type II DNA topoisomerases. We performed RNA-Seq after 4 and 8 h of merbarone treatment and compared transcriptional profiles versus untreated samples. We report raw sequencing data together with lists of gene counts and differentially expressed genes. |