Publications
2023 |
O. S. Mazari and A. Sebaa and J. L. Amaro-Mellado and F. Martínez-Álvarez Creating a homogenized earthquake catalog for Algeria and mapping the main seismic parameters using a geographic information system (Journal Article) Journal of African Earth Sciences, 201 , pp. 104859, 2023. (Abstract | Links | BibTeX | Tags: ) @article{MAZARI23, title = {Creating a homogenized earthquake catalog for Algeria and mapping the main seismic parameters using a geographic information system}, author = {O. S. Mazari and A. Sebaa and J. L. Amaro-Mellado and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S1464343X23000687}, doi = {https://doi.org/10.1016/j.jafrearsci.2023.104895}, year = {2023}, date = {2023-03-03}, journal = {Journal of African Earth Sciences}, volume = {201}, pages = {104859}, abstract = {A homogeneous earthquake catalog is an essential instrument to study earthquake occurrence patterns, employing diverse engineering applications. In this paper, we describe a series of compilation and processing steps to compile an updated earthquake catalog for Algeria, a North African country with relatively high seismic activity. The procedure consisted of several steps. First, a range of reliable catalogs were considered; second, the data was integrated and refined; third, magnitudes are homogenized from different kinds of magnitudes into moment magnitude (M_w); declustering is then performed; and, finally, the magnitude-year completeness was estimated. The resulting Algeria catalog is bounded by the geographical limits (19° - 38.5° N and 9.5° W - 12.5° E), and covers the 1960-2020 period. It includes 4021 seismic events, reported up to M_w 7.1. We also calculate a set of seismic parameters, namely M_max and b-value, and mapped them using a geographic information system. Thus, the territory is divided into cells based on different grids to conduct the analysis. The results of the seismic parameters mapping are discussed, highlighting significant details. Several cells presented a M_max between 6.0 and 7.1. Regarding the b-value, two regions (Oran and Constantine) presented a high b-value, implying low-stress areas, and three regions (Algiers, Batna, and Chlef) a low b-value (0.65- 0.85), suggesting high-stress areas. Finally, we suggest some recommendations for future seismic hazard assessment studies.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A homogeneous earthquake catalog is an essential instrument to study earthquake occurrence patterns, employing diverse engineering applications. In this paper, we describe a series of compilation and processing steps to compile an updated earthquake catalog for Algeria, a North African country with relatively high seismic activity. The procedure consisted of several steps. First, a range of reliable catalogs were considered; second, the data was integrated and refined; third, magnitudes are homogenized from different kinds of magnitudes into moment magnitude (M_w); declustering is then performed; and, finally, the magnitude-year completeness was estimated. The resulting Algeria catalog is bounded by the geographical limits (19° - 38.5° N and 9.5° W - 12.5° E), and covers the 1960-2020 period. It includes 4021 seismic events, reported up to M_w 7.1. We also calculate a set of seismic parameters, namely M_max and b-value, and mapped them using a geographic information system. Thus, the territory is divided into cells based on different grids to conduct the analysis. The results of the seismic parameters mapping are discussed, highlighting significant details. Several cells presented a M_max between 6.0 and 7.1. Regarding the b-value, two regions (Oran and Constantine) presented a high b-value, implying low-stress areas, and three regions (Algiers, Batna, and Chlef) a low b-value (0.65- 0.85), suggesting high-stress areas. Finally, we suggest some recommendations for future seismic hazard assessment studies. |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés PHILNet: A Novel Efficient Approach for Time Series Forecasting using Deep Learning (Journal Article) Information Sciences, in press , 2023. (Abstract | BibTeX | Tags: deep learning, forecasting, time series) @article{JIMENEZ-NAVARRO23b, title = {PHILNet: A Novel Efficient Approach for Time Series Forecasting using Deep Learning}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés}, year = {2023}, date = {2023-03-03}, journal = {Information Sciences}, volume = {in press}, abstract = {Time series is one of the most common data types in the industry nowadays. Forecasting the future of a time series behavior can be useful in planning ahead, saving time, resources, and helping avoid undesired scenarios. To make the forecasting, historical data is utilized due to the causal nature of the time series. Several deep learning algorithms have been presented in this area, where the input is processed through a series of non-linear functions to produce the output. We present a novel strategy to improve the performance of deep learning models in time series forecasting in terms of efficiency while reaching similar effectiveness. This approach separates the model into levels, starting with the easiest and continuing to the most difficult. The simpler levels deal with smoothed versions of the input, whereas the most sophisticated level deals with the raw data. This strategy seeks to mimic the human learning process, in which basic tasks are completed initially, followed by more precise and sophisticated ones. Our method achieved promising results, obtaining a 35% improvement in mean squared error and a 2.6 time decrease in training time compared with the best models found in a variety of time series.}, keywords = {deep learning, forecasting, time series}, pubstate = {published}, tppubtype = {article} } Time series is one of the most common data types in the industry nowadays. Forecasting the future of a time series behavior can be useful in planning ahead, saving time, resources, and helping avoid undesired scenarios. To make the forecasting, historical data is utilized due to the causal nature of the time series. Several deep learning algorithms have been presented in this area, where the input is processed through a series of non-linear functions to produce the output. We present a novel strategy to improve the performance of deep learning models in time series forecasting in terms of efficiency while reaching similar effectiveness. This approach separates the model into levels, starting with the easiest and continuing to the most difficult. The simpler levels deal with smoothed versions of the input, whereas the most sophisticated level deals with the raw data. This strategy seeks to mimic the human learning process, in which basic tasks are completed initially, followed by more precise and sophisticated ones. Our method achieved promising results, obtaining a 35% improvement in mean squared error and a 2.6 time decrease in training time compared with the best models found in a variety of time series. |
D. Azzouguer and A. Sebaa and D. Hadjout and F. Martínez-Álvarez Fraud Detection of Electricity Consumption using Robust Exponential and Holt-Winters Smoothing method (Conference) IEEE International Conference on Advanced Systems and Emergent Technologies, 2023. (Abstract | BibTeX | Tags: energy, forecasting, time series) @conference{AZZOUGUER23, title = {Fraud Detection of Electricity Consumption using Robust Exponential and Holt-Winters Smoothing method}, author = {D. Azzouguer and A. Sebaa and D. Hadjout and F. Martínez-Álvarez}, year = {2023}, date = {2023-02-20}, booktitle = {IEEE International Conference on Advanced Systems and Emergent Technologies}, abstract = {Non-technical losses (NTL), especially fraud detection, is very important for electricity distribution enterprises. Fraud detection allows for maximizing the effective economic return for such enterprises. In this paper, we provide an electricity consumption fraud detection approach based on robust exponential and Holt-Winters Smoothing methods. The proposed approach aims to effectively discover the fraudulent behaviors of electricity consumers. To validate the proposed method, we compared our results to two enterprise classifications: Activity Sector and Maximum Power Demand. Experimental validation is presented using a large dataset of real users from the Algerian economic sector with almost 2000 clients and 14 years of monthly electricity consumption. The results of the proposed method show highly efficient and realistic countermeasures to fraud detection and can enhance company profit.}, keywords = {energy, forecasting, time series}, pubstate = {published}, tppubtype = {conference} } Non-technical losses (NTL), especially fraud detection, is very important for electricity distribution enterprises. Fraud detection allows for maximizing the effective economic return for such enterprises. In this paper, we provide an electricity consumption fraud detection approach based on robust exponential and Holt-Winters Smoothing methods. The proposed approach aims to effectively discover the fraudulent behaviors of electricity consumers. To validate the proposed method, we compared our results to two enterprise classifications: Activity Sector and Maximum Power Demand. Experimental validation is presented using a large dataset of real users from the Algerian economic sector with almost 2000 clients and 14 years of monthly electricity consumption. The results of the proposed method show highly efficient and realistic countermeasures to fraud detection and can enhance company profit. |
E. T. Habtemariam and K. Kekeba and M. Martínez-Ballesteros and F. Mártinez-Álvarez A Bayesian Optimization-Based LSTM Model for Wind Power Forecasting in the Adama District, Ethiopia (Journal Article) Energies, 16 , pp. 2317, 2023. (Abstract | Links | BibTeX | Tags: deep learning, forecasting, time series) @article{EJIGU23, title = {A Bayesian Optimization-Based LSTM Model for Wind Power Forecasting in the Adama District, Ethiopia}, author = {E. T. Habtemariam and K. Kekeba and M. Martínez-Ballesteros and F. Mártinez-Álvarez}, url = {https://www.mdpi.com/1996-1073/16/5/2317}, doi = {https://doi.org/10.3390/en16052317}, year = {2023}, date = {2023-02-19}, journal = {Energies}, volume = {16}, pages = {2317}, abstract = {Renewable energies such as solar and wind power have become promising sources of energy to address the increase in greenhouse gases caused by the use of fossil fuels and to resolve current energy crises. Integrating wind energy into a large-scale electric grid presents a significant challenge due to the high intermittency and nonlinear behavior of wind power. Accurate wind power forecasting is essential for safe and efficient integration into the grid system. Many prediction models have been developed to predict the uncertain and nonlinear time series of wind power, but most neglect the use of Bayesian optimization to optimize the hyperparameters while training deep learning algorithms. The efficiency of grid search strategies decreases as the number of hyperparameters increases, and computation time complexity becomes an issue. This paper presents a robust and optimized Long-Short Term Memory network for forecasting wind power generation in the day ahead in the context of Ethiopia's renewable energy sector. The proposal uses Bayesian optimization to find the best hyperparameter combination in a reasonable computation time. The results indicate that tuning hyperparameters using this metaheuristic prior to building deep learning models significantly improves the predictive performance of the models. The proposed models were evaluated using MAE, RMSE, and MAPE metrics and outperformed both the baseline models and the optimized Gated Recurrent Unit architecture.}, keywords = {deep learning, forecasting, time series}, pubstate = {published}, tppubtype = {article} } Renewable energies such as solar and wind power have become promising sources of energy to address the increase in greenhouse gases caused by the use of fossil fuels and to resolve current energy crises. Integrating wind energy into a large-scale electric grid presents a significant challenge due to the high intermittency and nonlinear behavior of wind power. Accurate wind power forecasting is essential for safe and efficient integration into the grid system. Many prediction models have been developed to predict the uncertain and nonlinear time series of wind power, but most neglect the use of Bayesian optimization to optimize the hyperparameters while training deep learning algorithms. The efficiency of grid search strategies decreases as the number of hyperparameters increases, and computation time complexity becomes an issue. This paper presents a robust and optimized Long-Short Term Memory network for forecasting wind power generation in the day ahead in the context of Ethiopia's renewable energy sector. The proposal uses Bayesian optimization to find the best hyperparameter combination in a reasonable computation time. The results indicate that tuning hyperparameters using this metaheuristic prior to building deep learning models significantly improves the predictive performance of the models. The proposed models were evaluated using MAE, RMSE, and MAPE metrics and outperformed both the baseline models and the optimized Gated Recurrent Unit architecture. |
A. M.Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Mártinez-Álvarez A new Apache Spark-based framework for big data streaming forecasting in IoT networks (Journal Article) Journal of Supercomputing, pp. 1-23, 2023. (Abstract | Links | BibTeX | Tags: big data, data stream, forecasting, IoT) @article{FERNANDEZ23, title = {A new Apache Spark-based framework for big data streaming forecasting in IoT networks}, author = {A. M.Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Mártinez-Álvarez}, url = {https://link.springer.com/article/10.1007/s11227-023-05100-x}, doi = {https://doi.org/10.1007/s11227-023-05100-x}, year = {2023}, date = {2023-02-02}, journal = {Journal of Supercomputing}, pages = {1-23}, abstract = {Analyzing time-dependent data acquired in a continuous flow is a major challenge for various fields, such as big data and machine learning. Being able to analyze a large volume of data from various sources, such as sensors, networks, and the internet, is essential for improving the efficiency of our society's production processes. Additionally, this vast amount of data is collected dynamically in a continuous stream. The goal of this research is to provide a comprehensive framework for forecasting big data streams from Internet of Things networks and serve as a guide for designing and deploying other third-party solutions. Hence, a new framework for time series forecasting in a big data streaming scenario, using data collected from Internet of Things networks, is presented. This framework comprises of five main modules: Internet of Things network design and deployment, big data streaming architecture, stream data modeling method, big data forecasting method, and a comprehensive real-world application scenario, consisting of a physical Internet of Things network feeding the big data streaming architecture, being the linear regression the algorithm used for illustrative purposes. Comparison with other frameworks reveals that this is the first framework that incorporates and integrates all the aforementioned modules.}, keywords = {big data, data stream, forecasting, IoT}, pubstate = {published}, tppubtype = {article} } Analyzing time-dependent data acquired in a continuous flow is a major challenge for various fields, such as big data and machine learning. Being able to analyze a large volume of data from various sources, such as sensors, networks, and the internet, is essential for improving the efficiency of our society's production processes. Additionally, this vast amount of data is collected dynamically in a continuous stream. The goal of this research is to provide a comprehensive framework for forecasting big data streams from Internet of Things networks and serve as a guide for designing and deploying other third-party solutions. Hence, a new framework for time series forecasting in a big data streaming scenario, using data collected from Internet of Things networks, is presented. This framework comprises of five main modules: Internet of Things network design and deployment, big data streaming architecture, stream data modeling method, big data forecasting method, and a comprehensive real-world application scenario, consisting of a physical Internet of Things network feeding the big data streaming architecture, being the linear regression the algorithm used for illustrative purposes. Comparison with other frameworks reveals that this is the first framework that incorporates and integrates all the aforementioned modules. |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso A new approach based on association rules to add explainability to time series forecasting models (Journal Article) Information Fusion, 94 , pp. 169-180, 2023. (Abstract | Links | BibTeX | Tags: association rules, forecasting, time series, XAI) @article{TRONCOSO-GARCIA23, title = {A new approach based on association rules to add explainability to time series forecasting models}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S1566253523000295}, doi = {10.1016/j.inffus.2023.01.021}, year = {2023}, date = {2023-01-22}, journal = {Information Fusion}, volume = {94}, pages = {169-180}, abstract = {Machine learning and deep learning have become the most useful and powerful tools in the last years to mine information from large datasets. Despite the successful application to many research fields, it is widely known that some of these solutions based on artificial intelligence are considered black-box models, meaning that most experts find difficult to explain and interpret the models and why they generate such outputs. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability. Thus, models could be easily understood and further applied. This work proposes a novel method to explain black-box models, by using numeric association rules to explain and interpret multi-step time series forecasting models. Thus, a multi-objective algorithm is used to discover quantitative association rules from the target model. Then, visual explanation techniques are applied to make the rules more interpretable. Data from Spanish electricity energy consumption has been used to assess the suitability of the proposal.}, keywords = {association rules, forecasting, time series, XAI}, pubstate = {published}, tppubtype = {article} } Machine learning and deep learning have become the most useful and powerful tools in the last years to mine information from large datasets. Despite the successful application to many research fields, it is widely known that some of these solutions based on artificial intelligence are considered black-box models, meaning that most experts find difficult to explain and interpret the models and why they generate such outputs. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability. Thus, models could be easily understood and further applied. This work proposes a novel method to explain black-box models, by using numeric association rules to explain and interpret multi-step time series forecasting models. Thus, a multi-objective algorithm is used to discover quantitative association rules from the target model. Then, visual explanation techniques are applied to make the rules more interpretable. Data from Spanish electricity energy consumption has been used to assess the suitability of the proposal. |
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoco and G. Asencio-Cortés From Simple to Complex: A Sequential Method for Enhancing Time Series Forecasting with Deep Learning (Journal Article) Logic Journal of the IGPL, in press , 2023. (Abstract | BibTeX | Tags: deep learning, forecasting, time series) @article{JIMENEZ-NAVARRO23a, title = {From Simple to Complex: A Sequential Method for Enhancing Time Series Forecasting with Deep Learning}, author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Mártinez-Álvarez and A. Troncoco and G. Asencio-Cortés}, year = {2023}, date = {2023-01-20}, journal = {Logic Journal of the IGPL}, volume = {in press}, abstract = {Time series forecasting is a well-known deep learning application field in which previous data are used to predict the future behavior of the series. Recently, several deep learning approaches have been proposed in which several nonlinear functions are applied to the input to obtain the output. In this paper, we introduce a novel method to improve the performance of deep learning models in time series forecasting. This method divides the model into hierarchies or levels from simpler to more complex ones. Simpler levels handle smoothed versions of the input, whereas the most complex level processes the original time series. This method follows the human learning process where general/simpler tasks are performed first, and afterward, more precise/harder ones are accomplished.Our proposed methodology has been applied to the LSTM architecture, showing remarkable performance in various time series. In addition, a comparison is reported including a standard LSTM and novel methods such as DeepAR, Temporal Fusion Transformer (TFT), NBEATS and Echo State Network (ESN).}, keywords = {deep learning, forecasting, time series}, pubstate = {published}, tppubtype = {article} } Time series forecasting is a well-known deep learning application field in which previous data are used to predict the future behavior of the series. Recently, several deep learning approaches have been proposed in which several nonlinear functions are applied to the input to obtain the output. In this paper, we introduce a novel method to improve the performance of deep learning models in time series forecasting. This method divides the model into hierarchies or levels from simpler to more complex ones. Simpler levels handle smoothed versions of the input, whereas the most complex level processes the original time series. This method follows the human learning process where general/simpler tasks are performed first, and afterward, more precise/harder ones are accomplished.Our proposed methodology has been applied to the LSTM architecture, showing remarkable performance in various time series. In addition, a comparison is reported including a standard LSTM and novel methods such as DeepAR, Temporal Fusion Transformer (TFT), NBEATS and Echo State Network (ESN). |
2022 |
M. Á. Molina and M. J. Jiménez-Navarro and R. Arjona and F. Mártinez-Álvarez and G. Asencio-Cortés DIAFAN-TL: An instance weighting-based transfer learning algorithm with application to phenology forecasting (Journal Article) Knowledge-Based Systems, 254 , pp. 109644, 2022. (Abstract | Links | BibTeX | Tags: forecasting, time series, transfer learning) @article{MOLINA22, title = {DIAFAN-TL: An instance weighting-based transfer learning algorithm with application to phenology forecasting}, author = {M. Á. Molina and M. J. Jiménez-Navarro and R. Arjona and F. Mártinez-Álvarez and G. Asencio-Cortés}, url = {https://www.sciencedirect.com/science/article/pii/S0950705122008322}, doi = {https://doi.org/10.1016/j.knosys.2022.109644}, year = {2022}, date = {2022-10-22}, journal = {Knowledge-Based Systems}, volume = {254}, pages = {109644}, abstract = {The agricultural sector has been, and still is, the most important economic sector in many countries. Due to advances in technology, the amount and variety of available data have been increasing over the years. However, compared to other economic sectors, there is not always enough quality data for one particular domain (crops, plantations, plots) to obtain acceptable forecasting results with machine learning algorithms. In this context, transfer learning can help extract knowledge from different but related domains with enough data to transfer it to a target domain with scarce data. This process can overcome forecasting accuracy compared to training models uniquely with data from the target domain. In this work, a novel instance weighting-based transfer learning algorithm is proposed and applied to the phenology forecasting problem. A new metric named DIAFAN is proposed to weight samples from different source domains according to their relationship with the target domain, promoting the diversity of the information and avoiding inconsistent samples. Additionally, a set of validation schemes is specifically designed to ensure fair comparisons in terms of data volume with other benchmark transfer learning algorithms. The proposed algorithm, DIAFAN-TL, is tested with a proposed dataset of 16 plots of olive groves from different places, including information fusion from satellite images, meteorological stations and human field sampling of crop phenology. DIAFAN-TL achieves a remarkable improvement with respect to 15 other well-known transfer learning algorithms and three nontransfer learning scenarios. Finally, several performance analyses according to the different phenological states, prediction horizons and source domains are also performed.}, keywords = {forecasting, time series, transfer learning}, pubstate = {published}, tppubtype = {article} } The agricultural sector has been, and still is, the most important economic sector in many countries. Due to advances in technology, the amount and variety of available data have been increasing over the years. However, compared to other economic sectors, there is not always enough quality data for one particular domain (crops, plantations, plots) to obtain acceptable forecasting results with machine learning algorithms. In this context, transfer learning can help extract knowledge from different but related domains with enough data to transfer it to a target domain with scarce data. This process can overcome forecasting accuracy compared to training models uniquely with data from the target domain. In this work, a novel instance weighting-based transfer learning algorithm is proposed and applied to the phenology forecasting problem. A new metric named DIAFAN is proposed to weight samples from different source domains according to their relationship with the target domain, promoting the diversity of the information and avoiding inconsistent samples. Additionally, a set of validation schemes is specifically designed to ensure fair comparisons in terms of data volume with other benchmark transfer learning algorithms. The proposed algorithm, DIAFAN-TL, is tested with a proposed dataset of 16 plots of olive groves from different places, including information fusion from satellite images, meteorological stations and human field sampling of crop phenology. DIAFAN-TL achieves a remarkable improvement with respect to 15 other well-known transfer learning algorithms and three nontransfer learning scenarios. Finally, several performance analyses according to the different phenological states, prediction horizons and source domains are also performed. |
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso Explainable machine learning for sleep apnea prediction (Conference) KES International Conference on Knowledge Based and Intelligent information and Engineering Systems, 2022. (Abstract | Links | BibTeX | Tags: association rules, deep learning, time series, XAI) @conference{TRONCOSO-GARCIA22, title = {Explainable machine learning for sleep apnea prediction}, author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S1877050922012406}, doi = {https://doi.org/10.1016/j.procs.2022.09.351}, year = {2022}, date = {2022-09-10}, booktitle = {KES International Conference on Knowledge Based and Intelligent information and Engineering Systems}, pages = {2930-2939}, abstract = {Machine and deep learning has become one of the most useful tools in the last years as a diagnosis-decision-support tool in the health area. However, it is widely known that artificial intelligence models are considered a black box and most experts experience difficulties explaining and interpreting the models and their results. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability so that models can be easily understood and further applied. Obstructive sleep apnea is a common chronic respiratory disease related to sleep. Its diagnosis nowadays is done by processing different data signals, such as electrocardiogram or respiratory rate. The waveform of the respiratory signal is of importance too. Machine learning models could be applied to the signal's analysis. Data from a polysomnography study for automatic sleep apnea detection have been used to evaluate the use of the Local Interpretable Model-Agnostic (LIME) library for explaining the health data models. Results obtained help to understand how several features have been used in the model and their influence in the quality of sleep.}, keywords = {association rules, deep learning, time series, XAI}, pubstate = {published}, tppubtype = {conference} } Machine and deep learning has become one of the most useful tools in the last years as a diagnosis-decision-support tool in the health area. However, it is widely known that artificial intelligence models are considered a black box and most experts experience difficulties explaining and interpreting the models and their results. In this context, explainable artificial intelligence is emerging with the aim of providing black-box models with sufficient interpretability so that models can be easily understood and further applied. Obstructive sleep apnea is a common chronic respiratory disease related to sleep. Its diagnosis nowadays is done by processing different data signals, such as electrocardiogram or respiratory rate. The waveform of the respiratory signal is of importance too. Machine learning models could be applied to the signal's analysis. Data from a polysomnography study for automatic sleep apnea detection have been used to evaluate the use of the Local Interpretable Model-Agnostic (LIME) library for explaining the health data models. Results obtained help to understand how several features have been used in the model and their influence in the quality of sleep. |
D. Hadjout and J. F. Torres and A. Troncoso and A. Sebaa and F. Martínez-Álvarez Electricity consumption forecasting based on ensemble deep learning with application to the Algerian market (Journal Article) Energy, 243 , pp. 123060, 2022. (Abstract | Links | BibTeX | Tags: deep learning, energy, time series) @article{HADJOUT22, title = {Electricity consumption forecasting based on ensemble deep learning with application to the Algerian market}, author = {D. Hadjout and J. F. Torres and A. Troncoso and A. Sebaa and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0360544221033090}, doi = {https://doi.org/10.1016/j.energy.2021.123060}, year = {2022}, date = {2022-03-15}, journal = {Energy}, volume = {243}, pages = {123060}, abstract = {The economic sector is one of the most important pillars of countries. Economic activities of industry are intimately linked with the ability to meet their needs for electricity. Therefore, electricity forecasting is a very important task. It allows for better planning and management of energy resources. Several methods have been proposed to forecast energy consumption. In this work, to predict monthly electricity consumption for the economic sector, we develop a novel approach based on ensemble learning. Our approach combines three models that proved successful in the field, namely: Long Short Term Memory and Gated Recurrent Unit neural networks, and Temporal Convolutional Networks. The experiments have been conducted with almost 2000 clients and 14 years of monthly electricity consumption from Bejaia, Algeria. The results show that the proposed ensemble models achieve better performance than both the company's requirements and the prediction of the traditional individual models. Finally, statistical tests have been carried out to prove that significance of the ensemble models developed.}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The economic sector is one of the most important pillars of countries. Economic activities of industry are intimately linked with the ability to meet their needs for electricity. Therefore, electricity forecasting is a very important task. It allows for better planning and management of energy resources. Several methods have been proposed to forecast energy consumption. In this work, to predict monthly electricity consumption for the economic sector, we develop a novel approach based on ensemble learning. Our approach combines three models that proved successful in the field, namely: Long Short Term Memory and Gated Recurrent Unit neural networks, and Temporal Convolutional Networks. The experiments have been conducted with almost 2000 clients and 14 years of monthly electricity consumption from Bejaia, Algeria. The results show that the proposed ensemble models achieve better performance than both the company's requirements and the prediction of the traditional individual models. Finally, statistical tests have been carried out to prove that significance of the ensemble models developed. |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special issue SOCO-CISIS 2019-IGPL (Journal Article) Logig Journal of the IGPL, 30 (2), pp. 211-213, 2022. @article{MARTINEZ22b, title = {Special issue SOCO-CISIS 2019-IGPL}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://doi.org/10.1093/jigpal/jzaa066}, doi = {https://doi.org/10.1093/jigpal/jzaa066}, year = {2022}, date = {2022-03-10}, journal = {Logig Journal of the IGPL}, volume = {30}, number = {2}, pages = {211-213}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
J. F. Torres and F. Martínez-Álvarez and A. Troncoso A deep LSTM network for the Spanish electricity consumption forecasting (Journal Article) Neural Computing and Applications, 34 , pp. 10533-10545, 2022. (Abstract | Links | BibTeX | Tags: deep learning, energy) @article{TORRES22b, title = {A deep LSTM network for the Spanish electricity consumption forecasting}, author = {J. F. Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://link.springer.com/article/10.1007/s00521-021-06773-2}, doi = {https://doi.org/10.1007/s00521-021-06773-2}, year = {2022}, date = {2022-02-05}, journal = {Neural Computing and Applications}, volume = {34}, pages = {10533-10545}, abstract = {Nowadays, electricity is a basic commodity necessary for the well-being of any modern society. Due to the growth in electricity consumption in recent years, mainly in large cities, electricity forecasting is key to the management of an efficient, sustainable and safe smart grid for the consumer. In this work, a deep neural network is proposed to address the electricity consumption forecasting in the short-term, namely, a long short-term memory (LSTM) network due to its ability to deal with sequential data such as time-series data. First, the optimal values for certain hyper-parameters have been obtained by a random search and a metaheuristic, called coronavirus optimization algorithm (CVOA), based on the propagation of the SARS-Cov-2 virus. Then, the optimal LSTM has been applied to predict the electricity demand with 4-h forecast horizon. Results using Spanish electricity data during nine years and half measured with 10-min frequency are presented and discussed. Finally, the performance of the proposed LSTM using random search and the LSTM using CVOA is compared, on the one hand, with that of recently published deep neural networks (such as a deep feed-forward neural network optimized with a grid search) and temporal fusion transformers optimized with a sampling algorithm, and, on the other hand, with traditional machine learning techniques, such as a linear regression, decision trees and tree-based ensemble techniques (gradient-boosted trees and random forest), achieving the smallest prediction error below 1.5%.}, keywords = {deep learning, energy}, pubstate = {published}, tppubtype = {article} } Nowadays, electricity is a basic commodity necessary for the well-being of any modern society. Due to the growth in electricity consumption in recent years, mainly in large cities, electricity forecasting is key to the management of an efficient, sustainable and safe smart grid for the consumer. In this work, a deep neural network is proposed to address the electricity consumption forecasting in the short-term, namely, a long short-term memory (LSTM) network due to its ability to deal with sequential data such as time-series data. First, the optimal values for certain hyper-parameters have been obtained by a random search and a metaheuristic, called coronavirus optimization algorithm (CVOA), based on the propagation of the SARS-Cov-2 virus. Then, the optimal LSTM has been applied to predict the electricity demand with 4-h forecast horizon. Results using Spanish electricity data during nine years and half measured with 10-min frequency are presented and discussed. Finally, the performance of the proposed LSTM using random search and the LSTM using CVOA is compared, on the one hand, with that of recently published deep neural networks (such as a deep feed-forward neural network optimized with a grid search) and temporal fusion transformers optimized with a sampling algorithm, and, on the other hand, with traditional machine learning techniques, such as a linear regression, decision trees and tree-based ensemble techniques (gradient-boosted trees and random forest), achieving the smallest prediction error below 1.5%. |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems (Journal Article) Neurocomputing, 470 , pp. 278-279, 2022. @article{MARTINEZ22, title = {Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0925231221001399}, doi = {https://doi.org/10.1016/j.neucom.2021.01.071}, year = {2022}, date = {2022-01-22}, journal = {Neurocomputing}, volume = {470}, pages = {278-279}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm (Journal Article) Information, 13 (44), pp. 1–16, 2022. (Abstract | Links | BibTeX | Tags: feature selection, time series) @article{losada2022, title = {Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm}, author = {A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown}, url = {https://www.mdpi.com/2078-2489/13/2/44}, doi = {10.3390/info13020044}, year = {2022}, date = {2022-01-01}, journal = {Information}, volume = {13}, number = {44}, pages = {1--16}, abstract = {Purchase processes on Amazon Marketplace begin at the Buy Box, which represents the buy click process through which numerous sellers compete. This study aimed to estimate empirically the relevant seller characteristics that Amazon could consider featuring in the Buy Box. To that end, 22 product categories from Italy’s Amazon web page were studied over a ten-month period, and the sellers were analyzed through their products featured in the Buy Box. Two different experiments were proposed and the results were analyzed using four classification algorithms (a neural network, random forest, support vector machine, and C5.0 decision trees) and a rule-based classification. The first experiment aimed to characterize sellers unspecifically by predicting their change at the Buy Box. The second one aimed to predict which seller would be featured in it. Both experiments revealed that the customer experience and the dynamics of the sellers’ prices were important features of the Buy Box. Additionally, we proposed a set of default features that Amazon could consider when no information about sellers was available. We also proposed the possible existence of a relationship or composition among important features that could be used for sellers to be featured in the Buy Box.}, keywords = {feature selection, time series}, pubstate = {published}, tppubtype = {article} } Purchase processes on Amazon Marketplace begin at the Buy Box, which represents the buy click process through which numerous sellers compete. This study aimed to estimate empirically the relevant seller characteristics that Amazon could consider featuring in the Buy Box. To that end, 22 product categories from Italy’s Amazon web page were studied over a ten-month period, and the sellers were analyzed through their products featured in the Buy Box. Two different experiments were proposed and the results were analyzed using four classification algorithms (a neural network, random forest, support vector machine, and C5.0 decision trees) and a rule-based classification. The first experiment aimed to characterize sellers unspecifically by predicting their change at the Buy Box. The second one aimed to predict which seller would be featured in it. Both experiments revealed that the customer experience and the dynamics of the sellers’ prices were important features of the Buy Box. Additionally, we proposed a set of default features that Amazon could consider when no information about sellers was available. We also proposed the possible existence of a relationship or composition among important features that could be used for sellers to be featured in the Buy Box. |
M.A. Castán-Lascorz and P. Jiménez-Herrera and A. Troncoso and G. Asencio-Cortés A new hybrid method for predicting univariate and multivariate time series based on pattern forecasting (Journal Article) Information Sciences, 586 , pp. 611–627, 2022. (Abstract | Links | BibTeX | Tags: clustering, energy, time series) @article{castan2022, title = {A new hybrid method for predicting univariate and multivariate time series based on pattern forecasting}, author = {M.A. Castán-Lascorz and P. Jiménez-Herrera and A. Troncoso and G. Asencio-Cortés}, url = {https://www.sciencedirect.com/science/article/pii/S0020025521012226?via%3Dihub}, doi = {10.1016/j.ins.2021.12.001}, year = {2022}, date = {2022-01-01}, journal = {Information Sciences}, volume = {586}, pages = {611--627}, abstract = {Time series forecasting has become indispensable for multiple applications and industrial processes. Currently, a large number of algorithms have been developed to forecast time series, all of which are suitable depending on the characteristics and patterns to be inferred in each case. In this work, a new algorithm is proposed to predict both univariate and multivariate time series based on a combination of clustering, classification and forecasting techniques. The main goal of the proposed algorithm is first to group windows of time series values with similar patterns by applying a clustering process. Then, a specific forecasting model for each pattern is built and training is only conducted with the time windows corresponding to that pattern. The new algorithm has been designed using a flexible framework that allows the model to be generated using any combination of approaches within multiple machine learning techniques. To evaluate the model, several experiments are carried out using different configurations of the clustering, classification and forecasting methods that the model consists of. The results are analyzed and compared to classical prediction models, such as autoregressive, integrated, moving average and Holt-Winters models, to very recent forecasting methods, including deep, long short-term memory neural networks, and to well-known methods in the literature, such as k nearest neighbors, classification and regression trees, as well as random forest.}, keywords = {clustering, energy, time series}, pubstate = {published}, tppubtype = {article} } Time series forecasting has become indispensable for multiple applications and industrial processes. Currently, a large number of algorithms have been developed to forecast time series, all of which are suitable depending on the characteristics and patterns to be inferred in each case. In this work, a new algorithm is proposed to predict both univariate and multivariate time series based on a combination of clustering, classification and forecasting techniques. The main goal of the proposed algorithm is first to group windows of time series values with similar patterns by applying a clustering process. Then, a specific forecasting model for each pattern is built and training is only conducted with the time windows corresponding to that pattern. The new algorithm has been designed using a flexible framework that allows the model to be generated using any combination of approaches within multiple machine learning techniques. To evaluate the model, several experiments are carried out using different configurations of the clustering, classification and forecasting methods that the model consists of. The results are analyzed and compared to classical prediction models, such as autoregressive, integrated, moving average and Holt-Winters models, to very recent forecasting methods, including deep, long short-term memory neural networks, and to well-known methods in the literature, such as k nearest neighbors, classification and regression trees, as well as random forest. |
P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso Streaming big time series forecasting based on nearest similar patterns with application to energy consumption (Journal Article) Logic Journal of the IGPL, (in press) , pp. 1–20, 2022. (Abstract | Links | BibTeX | Tags: clustering, data streaming, energy, time series) @article{jimenez2022, title = {Streaming big time series forecasting based on nearest similar patterns with application to energy consumption}, author = {P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso}, url = {https://academic.oup.com/jigpal/advance-article-abstract/doi/10.1093/jigpal/jzac017/6534493?redirectedFrom=fulltext}, doi = {https://doi.org/10.1093/jigpal/jzac017}, year = {2022}, date = {2022-01-01}, journal = {Logic Journal of the IGPL}, volume = {(in press)}, pages = {1--20}, abstract = {This work presents a novel approach to forecast streaming big time series based on nearest similar patterns. This approach combines a clustering algorithm with a classifier and the nearest neighbors algorithm. It presents two separate stages: offline and online. The offline phase is for training and finding the best models for clustering, classification and the nearest neighbors algorithm. The online phase is to predict big time series in real time. In the offline phase, data are divided into clusters and a forecasting model based on the nearest neighbors is trained for each cluster. In addition, a classifier is trained using the cluster assignments previously generated by the clustering algorithm. In the online phase, the classifier predicts the cluster label of an instance, and the proper nearest neighbors model according to the predicted cluster label is applied to obtain the final prediction using the similar patterns. The algorithm is able to be updated incrementally for online learning from data streams. Results are reported using electricity consumption with a granularity of 10 minutes for 4-hour-ahead forecasting and compared with well-known online benchmark learners, showing a remarkable improvement in prediction accuracy.}, keywords = {clustering, data streaming, energy, time series}, pubstate = {published}, tppubtype = {article} } This work presents a novel approach to forecast streaming big time series based on nearest similar patterns. This approach combines a clustering algorithm with a classifier and the nearest neighbors algorithm. It presents two separate stages: offline and online. The offline phase is for training and finding the best models for clustering, classification and the nearest neighbors algorithm. The online phase is to predict big time series in real time. In the offline phase, data are divided into clusters and a forecasting model based on the nearest neighbors is trained for each cluster. In addition, a classifier is trained using the cluster assignments previously generated by the clustering algorithm. In the online phase, the classifier predicts the cluster label of an instance, and the proper nearest neighbors model according to the predicted cluster label is applied to obtain the final prediction using the similar patterns. The algorithm is able to be updated incrementally for online learning from data streams. Results are reported using electricity consumption with a granularity of 10 minutes for 4-hour-ahead forecasting and compared with well-known online benchmark learners, showing a remarkable improvement in prediction accuracy. |
J. Roiz-Pagador and A. Chacon-Maldonado and R. Ruiz and G. Asencio-Cortes Earthquake Prediction in California using Feature Selection techniques (Conference) 16th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2021), Advances in Intelligent Systems and Computing 2022. (Links | BibTeX | Tags: feature selection, natural disasters, time series) @conference{roiz2022, title = {Earthquake Prediction in California using Feature Selection techniques}, author = {J. Roiz-Pagador and A. Chacon-Maldonado and R. Ruiz and G. Asencio-Cortes}, url = {https://link.springer.com/chapter/10.1007/978-3-030-87869-6_69}, year = {2022}, date = {2022-01-01}, booktitle = {16th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2021)}, series = {Advances in Intelligent Systems and Computing}, keywords = {feature selection, natural disasters, time series}, pubstate = {published}, tppubtype = {conference} } |
G. Velázquez and F. Morales and M. García-Torres and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and F. Daumas-Ladouce and C. Ayala and D. Pinto-Roaand P. Gardel-Sotomayor Distribution level Electric current consumption and meteorological data set of the East region of Paraguay (Journal Article) Data in Brief, 40 , pp. 107699, 2022. (Abstract | Links | BibTeX | Tags: energy, time series) @article{velazquez2022distribution, title = {Distribution level Electric current consumption and meteorological data set of the East region of Paraguay}, author = {G. Velázquez and F. Morales and M. García-Torres and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and F. Daumas-Ladouce and C. Ayala and D. Pinto-Roaand P. Gardel-Sotomayor}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921009744}, doi = {10.1016/j.dib.2021.107699}, year = {2022}, date = {2022-01-01}, journal = {Data in Brief}, volume = {40}, pages = {107699}, publisher = {Elsevier pubstate = published}, abstract = {This paper presents a data set with information on meteorological data and electricity consumption in the department of Alto Paraná, Paraguay. The meteorological data were registered every three hours at the Aeropuerto Guarani, Department of Alto Paraná, which belongs to the Dirección Nacional de Aeronáutica Civil of Paraguay. The final data consists of a total of 22.445 records of temperature, relative humidity, wind speed and atmospheric pressure. On the other hand, the electrical energy consumption data set contains a total of 1.848.947 records, all of them coming from the one hundred and fifteen feeders located throughout the Alto Paraná region of Paraguay. Electrical energy consumption data was provided by Administración Nacional de Electricidad (ANDE). The analysis of this data can yield insights regarding the energy consumption in the area.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } This paper presents a data set with information on meteorological data and electricity consumption in the department of Alto Paraná, Paraguay. The meteorological data were registered every three hours at the Aeropuerto Guarani, Department of Alto Paraná, which belongs to the Dirección Nacional de Aeronáutica Civil of Paraguay. The final data consists of a total of 22.445 records of temperature, relative humidity, wind speed and atmospheric pressure. On the other hand, the electrical energy consumption data set contains a total of 1.848.947 records, all of them coming from the one hundred and fifteen feeders located throughout the Alto Paraná region of Paraguay. Electrical energy consumption data was provided by Administración Nacional de Electricidad (ANDE). The analysis of this data can yield insights regarding the energy consumption in the area. |
F. Morales and M. García-Torres and G. Velázquez and F. Daumas-Ladouce and P. Gardel-Sotomayor and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and C. Sauer Ayala and D. Pinto-Roa Analysis of Electric Energy Consumption Profiles Using a Machine Learning Approach: A Paraguayan Case Study (Journal Article) Electronics, 11 (2), pp. 267, 2022. (Abstract | Links | BibTeX | Tags: clustering, energy, pattern recognition, time series) @article{morales2022analysis, title = {Analysis of Electric Energy Consumption Profiles Using a Machine Learning Approach: A Paraguayan Case Study}, author = {F. Morales and M. García-Torres and G. Velázquez and F. Daumas-Ladouce and P. Gardel-Sotomayor and F. Gómez-Vela and F. Divina and J.L. Vázquez Noguera and C. Sauer Ayala and D. Pinto-Roa}, url = {https://www.mdpi.com/2079-9292/11/2/267}, doi = {10.3390/electronics11020267}, year = {2022}, date = {2022-01-01}, journal = {Electronics}, volume = {11}, number = {2}, pages = {267}, publisher = {Multidisciplinary Digital Publishing Institute pubstate = published}, abstract = {Correctly defining and grouping electrical feeders is of great importance for electrical system operators. In this paper, we compare two different clustering techniques, K-means and hierarchical agglomerative clustering, applied to real data from the east region of Paraguay. The raw data were pre-processed, resulting in four data sets, namely, (i) a weekly feeder demand, (ii) a monthly feeder demand, (iii) a statistical feature set extracted from the original data and (iv) a seasonal and daily consumption feature set obtained considering the characteristics of the Paraguayan load curve. Considering the four data sets, two clustering algorithms, two distance metrics and five linkage criteria a total of 36 models with the Silhouette, Davies–Bouldin and Calinski–Harabasz index scores was assessed. The K-means algorithms with the seasonal feature data sets showed the best performance considering the Silhouette, Calinski–Harabasz and Davies–Bouldin validation index scores with a configuration of six clusters.}, keywords = {clustering, energy, pattern recognition, time series}, pubstate = {published}, tppubtype = {article} } Correctly defining and grouping electrical feeders is of great importance for electrical system operators. In this paper, we compare two different clustering techniques, K-means and hierarchical agglomerative clustering, applied to real data from the east region of Paraguay. The raw data were pre-processed, resulting in four data sets, namely, (i) a weekly feeder demand, (ii) a monthly feeder demand, (iii) a statistical feature set extracted from the original data and (iv) a seasonal and daily consumption feature set obtained considering the characteristics of the Paraguayan load curve. Considering the four data sets, two clustering algorithms, two distance metrics and five linkage criteria a total of 36 models with the Silhouette, Davies–Bouldin and Calinski–Harabasz index scores was assessed. The K-means algorithms with the seasonal feature data sets showed the best performance considering the Silhouette, Calinski–Harabasz and Davies–Bouldin validation index scores with a configuration of six clusters. |
S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty (Journal Article) Entropy, 24 (1), pp. 64, 2022. (Abstract | Links | BibTeX | Tags: feature selection) @article{gomez2022measuring, title = {Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty}, author = {S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer}, url = {https://www.mdpi.com/1099-4300/24/1/64}, doi = {10.3390/e24010064}, year = {2022}, date = {2022-01-01}, journal = {Entropy}, volume = {24}, number = {1}, pages = {64}, publisher = {Multidisciplinary Digital Publishing Institute}, abstract = {Interaction between variables is often found in statistical models, and it is usually expressed in the model as an additional term when the variables are numeric. However, when the variables are categorical (also known as nominal or qualitative) or mixed numerical-categorical, defining, detecting, and measuring interactions is not a simple task. In this work, based on an entropy-based correlation measure for n nominal variables (named as Multivariate Symmetrical Uncertainty (MSU)), we propose a formal and broader definition for the interaction of the variables. Two series of experiments are presented. In the first series, we observe that datasets where some record types or combinations of categories are absent, forming patterns of records, which often display interactions among their attributes. In the second series, the interaction/non-interaction behavior of a regression model (entirely built on continuous variables) gets successfully replicated under a discretized version of the dataset. It is shown that there is an interaction-wise correspondence between the continuous and the discretized versions of the dataset. Hence, we demonstrate that the proposed definition of interaction enabled by the MSU is a valuable tool for detecting and measuring interactions within linear and non-linear models.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } Interaction between variables is often found in statistical models, and it is usually expressed in the model as an additional term when the variables are numeric. However, when the variables are categorical (also known as nominal or qualitative) or mixed numerical-categorical, defining, detecting, and measuring interactions is not a simple task. In this work, based on an entropy-based correlation measure for n nominal variables (named as Multivariate Symmetrical Uncertainty (MSU)), we propose a formal and broader definition for the interaction of the variables. Two series of experiments are presented. In the first series, we observe that datasets where some record types or combinations of categories are absent, forming patterns of records, which often display interactions among their attributes. In the second series, the interaction/non-interaction behavior of a regression model (entirely built on continuous variables) gets successfully replicated under a discretized version of the dataset. It is shown that there is an interaction-wise correspondence between the continuous and the discretized versions of the dataset. Hence, we demonstrate that the proposed definition of interaction enabled by the MSU is a valuable tool for detecting and measuring interactions within linear and non-linear models. |
C. Segarra-Martín and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez A novel approach to discover numerical association based on the Coronavirus Optimization Algorithm (Conference) SAC 37th Symposium On Applied Computing, 2022. (Abstract | BibTeX | Tags: association rules) @conference{SAC2022, title = {A novel approach to discover numerical association based on the Coronavirus Optimization Algorithm }, author = {C. Segarra-Martín and M. Martínez-Ballesteros and A. Troncoso and F. Martínez-Álvarez}, year = {2022}, date = {2022-01-01}, booktitle = {SAC 37th Symposium On Applied Computing}, abstract = {The disease caused by the SARS-CoV-2 (COVID-19) has affected millions of people around the world since its detection in 2019. This pandemic inspired the development of the Coronavirus Optimization Algorithm (CVOA), a bio-inspired metaheuristic that was originally used to adjust deep learning models for time series forecasting, by means of a binary codification. In this paper, a integer codification for the CVOA individual is introduced and used for optimizing a novel approach for numerical association rules mining. In addition, the CVOA setting parameters have been updated and a vaccination rate based on real data has been incorporated, to make it more efficient. As an application case, the prediction of earthquakes of large magnitude has been addressed. This kind of events are rare and, therefore, they can be characterized by rules with very high interest or lift and low support. Thus, the algorithm has been applied to the extraction of rules meeting specific criteria in an earthquake data set, provided by the National Geographic Institute of Spain. The results show CVOA as a promising tool for numerical association rules mining, obtaining rules with useful and meaningful information for predicting the occurrence of large earthquakes.}, keywords = {association rules}, pubstate = {published}, tppubtype = {conference} } The disease caused by the SARS-CoV-2 (COVID-19) has affected millions of people around the world since its detection in 2019. This pandemic inspired the development of the Coronavirus Optimization Algorithm (CVOA), a bio-inspired metaheuristic that was originally used to adjust deep learning models for time series forecasting, by means of a binary codification. In this paper, a integer codification for the CVOA individual is introduced and used for optimizing a novel approach for numerical association rules mining. In addition, the CVOA setting parameters have been updated and a vaccination rate based on real data has been incorporated, to make it more efficient. As an application case, the prediction of earthquakes of large magnitude has been addressed. This kind of events are rare and, therefore, they can be characterized by rules with very high interest or lift and low support. Thus, the algorithm has been applied to the extraction of rules meeting specific criteria in an earthquake data set, provided by the National Geographic Institute of Spain. The results show CVOA as a promising tool for numerical association rules mining, obtaining rules with useful and meaningful information for predicting the occurrence of large earthquakes. |
L. Melgar-García and D. Gutiérrez-Avilés and M. T. Godinho and R. Espada and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero A new big data triclustering approach for extracting three-dimensional patterns in precision agriculture (Journal Article) Neurocomputing, 500 , pp. 268-278, 2022. (Abstract | Links | BibTeX | Tags: big data, clustering, pattern recognition) @article{MELGAR21_NEUCOMb, title = {A new big data triclustering approach for extracting three-dimensional patterns in precision agriculture}, author = {L. Melgar-García and D. Gutiérrez-Avilés and M. T. Godinho and R. Espada and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0925231222006415}, doi = {https://doi.org/10.1016/j.neucom.2021.06.101}, year = {2022}, date = {2022-01-01}, journal = {Neurocomputing}, volume = {500}, pages = {268-278}, abstract = {Precision agriculture focuses on the development of site-specific harvest considering the variability of each crop area. Vegetation indices allow the study and delineation of different characteristics of each field zone, generally invisible to the naked-eye. This paper introduces a new big data triclustering approach based on evolutionary algorithms. The algorithm shows its capability to discover three-dimensional patterns on the basis of vegetation indices from vine crops. Different vegetation indices have been tested to find different patterns in the crops. The results reported using a vineyard crop located in Portugal depicts four areas with different moisture stress particularities that can lead to changes in the management of the vineyard. Furthermore, scalability studies have been performed, showing that the proposed algorithm is suitable for dealing with big datasets.}, keywords = {big data, clustering, pattern recognition}, pubstate = {published}, tppubtype = {article} } Precision agriculture focuses on the development of site-specific harvest considering the variability of each crop area. Vegetation indices allow the study and delineation of different characteristics of each field zone, generally invisible to the naked-eye. This paper introduces a new big data triclustering approach based on evolutionary algorithms. The algorithm shows its capability to discover three-dimensional patterns on the basis of vegetation indices from vine crops. Different vegetation indices have been tested to find different patterns in the crops. The results reported using a vineyard crop located in Portugal depicts four areas with different moisture stress particularities that can lead to changes in the management of the vineyard. Furthermore, scalability studies have been performed, showing that the proposed algorithm is suitable for dealing with big datasets. |
2021 |
K.-T. T. Bui and J. F. Torres and D. Gutiérrez-Avilés and V. H. Nhu and F. Martínez-Álvarez and D. T. Bui Deformation forecasting of a hydropower dam by hybridizing a Long Short-Term Memory deep learning network with the Coronavirus Optimization Algorithm (Journal Article) Computer-Aided Civil and Infrastructure Engineering, 37 , pp. 1368-1386, 2021. (Abstract | Links | BibTeX | Tags: deep learning, time series) @article{BUI22b, title = {Deformation forecasting of a hydropower dam by hybridizing a Long Short-Term Memory deep learning network with the Coronavirus Optimization Algorithm}, author = {K.-T. T. Bui and J. F. Torres and D. Gutiérrez-Avilés and V. H. Nhu and F. Martínez-Álvarez and D. T. Bui}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/mice.12810}, doi = {https://doi.org/10.1111/mice.12810}, year = {2021}, date = {2021-11-24}, journal = {Computer-Aided Civil and Infrastructure Engineering}, volume = {37}, pages = {1368-1386}, abstract = {The safety operation and management of hydropower dam play a critical role in social-economic development and ensure people's safety in many countries; therefore, modeling and forecasting the hydropower dam's deformations with high accuracy is crucial. This research aims to propose and validate a new model based on deep learning long short-term memory (LSTM) and the coronavirus optimization algorithm (CVOA), named CVOA-LSTM, for forecasting the deformations of the hydropower dam. The second-largest hydropower dam of Vietnam, located in the Hoa Binh province, is focused. Herein, we used the LSTM to establish the deformation model, whereas the CVOA was utilized to optimize the three parameters of the LSTM, the number of hidden layers, the learning rate, and the dropout. The efficacy of the proposed CVOA-LSTM model is assessed by comparing its forecasting performance with state-of-the-art benchmarks, sequential minimal optimization for support vector regression, Gaussian process, M5' model tree, multilayer perceptron neural network, reduced error pruning tree, random tree, random forest, and radial basis function neural network. The result shows that the proposed CVOA-LSTM model has high forecasting capability (R2 = 0.874, root mean square error = 0.34, mean absolute error = 0.23) and outperforms the benchmarks. We conclude that CVOA-LSTM is a new tool that can be considered to forecast the hydropower dam's deformations.}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {article} } The safety operation and management of hydropower dam play a critical role in social-economic development and ensure people's safety in many countries; therefore, modeling and forecasting the hydropower dam's deformations with high accuracy is crucial. This research aims to propose and validate a new model based on deep learning long short-term memory (LSTM) and the coronavirus optimization algorithm (CVOA), named CVOA-LSTM, for forecasting the deformations of the hydropower dam. The second-largest hydropower dam of Vietnam, located in the Hoa Binh province, is focused. Herein, we used the LSTM to establish the deformation model, whereas the CVOA was utilized to optimize the three parameters of the LSTM, the number of hidden layers, the learning rate, and the dropout. The efficacy of the proposed CVOA-LSTM model is assessed by comparing its forecasting performance with state-of-the-art benchmarks, sequential minimal optimization for support vector regression, Gaussian process, M5' model tree, multilayer perceptron neural network, reduced error pruning tree, random tree, random forest, and radial basis function neural network. The result shows that the proposed CVOA-LSTM model has high forecasting capability (R2 = 0.874, root mean square error = 0.34, mean absolute error = 0.23) and outperforms the benchmarks. We conclude that CVOA-LSTM is a new tool that can be considered to forecast the hydropower dam's deformations. |
R. Scitovski and K. Sabo and F. Martínez-Álvarez and S. Ungar Cluster analysis and applications (Book) Springer, 2021, ISBN: 978-3-030-74551-6. (Abstract | Links | BibTeX | Tags: clustering) @book{SCITOVSKI21, title = {Cluster analysis and applications}, author = {R. Scitovski and K. Sabo and F. Martínez-Álvarez and S. Ungar}, url = {https://www.springer.com/gp/book/9783030745516}, doi = {10.1007/978-3-030-74552-3 tppubtype = book}, isbn = {978-3-030-74551-6}, year = {2021}, date = {2021-09-26}, publisher = {Springer}, abstract = {With the development of Big Data platforms for managing massive amount of data and wide availability of tools for processing these data, the biggest limitation is the lack of trained experts who are qualified to process and interpret the results. This textbook is intended for graduate students and experts using methods of cluster analysis and applications in various fields. Suitable for an introductory course on cluster analysis or data mining, with an in-depth mathematical treatment that includes discussions on different measures, primitives (points, lines, etc.) and optimization-based clustering methods, Cluster Analysis and Applications also includes coverage of deep learning based clustering methods. With clear explanations of ideas and precise definitions of concepts, accompanied by numerous examples and exercises together with Mathematica programs and modules, Cluster Analysis and Applications may be used by students and researchers in various disciplines, working in data analysis or data science.}, keywords = {clustering}, pubstate = {published}, tppubtype = {book} } With the development of Big Data platforms for managing massive amount of data and wide availability of tools for processing these data, the biggest limitation is the lack of trained experts who are qualified to process and interpret the results. This textbook is intended for graduate students and experts using methods of cluster analysis and applications in various fields. Suitable for an introductory course on cluster analysis or data mining, with an in-depth mathematical treatment that includes discussions on different measures, primitives (points, lines, etc.) and optimization-based clustering methods, Cluster Analysis and Applications also includes coverage of deep learning based clustering methods. With clear explanations of ideas and precise definitions of concepts, accompanied by numerous examples and exercises together with Mathematica programs and modules, Cluster Analysis and Applications may be used by students and researchers in various disciplines, working in data analysis or data science. |
J. F. Torres and M. J. Jiménez-Navarro and F. Martínez-Álvarez and A. Troncoso Electricity consumption time series forecasting using Temporal Convolutional Networks (Conference) Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21), Lecture Notes in Artificial Intelligence 2021. (BibTeX | Tags: deep learning, time series) @conference{TORRES21b, title = {Electricity consumption time series forecasting using Temporal Convolutional Networks}, author = {J. F. Torres and M. J. Jiménez-Navarro and F. Martínez-Álvarez and A. Troncoso}, year = {2021}, date = {2021-09-01}, booktitle = {Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21)}, series = {Lecture Notes in Artificial Intelligence}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {conference} } |
A. Melara and J. F. Torres and A. Troncoso and F. Martínez-Álvarez Electricity Generation Forecasting in Concentrating Solar-Thermal Power Plants with Ensemble Learning (Conference) SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications, 1401 , Advances in Intelligent Systems and Computing 2021. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{MELARA21, title = {Electricity Generation Forecasting in Concentrating Solar-Thermal Power Plants with Ensemble Learning}, author = {A. Melara and J. F. Torres and A. Troncoso and F. Martínez-Álvarez}, doi = {https://doi.org/10.1007/978-3-030-87869-6_63}, year = {2021}, date = {2021-09-01}, booktitle = {SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications}, volume = {1401}, pages = {665-674}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
D. Hadjout and J. F. Torres and A. Sebaa and F. Martínez-Álvarez SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications, 1401 , Advances in Intelligent Systems and Computing 2021. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{HADJOUT21, title = {Medium-Term Electricity Consumption Forecasting in Algeria Based on Clustering, Deep Learning and Bayesian Optimization Methods}, author = {D. Hadjout and J. F. Torres and A. Sebaa and F. Martínez-Álvarez}, doi = {https://doi.org/10.1007/978-3-030-87869-6_70}, year = {2021}, date = {2021-09-01}, booktitle = {SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications}, volume = {1401}, pages = {739-748}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
M. J. Jiménez-Navarro and F. Martínez-Álvarez and A. Troncoso and G. Asencio-Cortés HLNet: A Novel Hierarchical Deep Neural Network for Time Series Forecasting (Conference) SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications, 1401 , Advances in Intelligent Systems and Computing 2021. (Links | BibTeX | Tags: deep learning, time series) @conference{JIMENEZ-NAVARRO21, title = {HLNet: A Novel Hierarchical Deep Neural Network for Time Series Forecasting}, author = {M. J. Jiménez-Navarro and F. Martínez-Álvarez and A. Troncoso and G. Asencio-Cortés}, doi = {https://doi.org/10.1007/978-3-030-87869-6_68}, year = {2021}, date = {2021-09-01}, booktitle = {SOCO International Conference on Soft Computing Models in Industrial and Environmental Applications}, volume = {1401}, pages = {717-727}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {conference} } |
M. A. Molina and M. J. Jiménez-Navarro and F. Martínez-Álvarez and G. Asencio-Cortés A Model-Based Deep Transfer Learning Algorithm for Phenology Forecasting Using Satellite Imagery (Conference) HAIS, 12886 , Lecture Notes in Computer Science 2021. (Links | BibTeX | Tags: deep learning, time series) @conference{MOLINA21, title = {A Model-Based Deep Transfer Learning Algorithm for Phenology Forecasting Using Satellite Imagery}, author = {M. A. Molina and M. J. Jiménez-Navarro and F. Martínez-Álvarez and G. Asencio-Cortés}, url = {https://link.springer.com/chapter/10.1007/978-3-030-86271-8_43}, doi = {https://doi.org/10.1007/978-3-030-86271-8_43}, year = {2021}, date = {2021-09-01}, booktitle = {HAIS}, volume = {12886}, pages = {511-523}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, time series}, pubstate = {published}, tppubtype = {conference} } |
A. Morales-Esteban and F. Martínez-Álvarez and S. Scitovski and R. Scitovski Mahalanobis clustering for the determination of incidence-magnitude seismic parameters for the Iberian Peninsula and the Republic of Croatia (Journal Article) Computers and Geosciences, 156 , pp. 104873, 2021. (Abstract | Links | BibTeX | Tags: clustering, natural disasters) @article{MORALES21, title = {Mahalanobis clustering for the determination of incidence-magnitude seismic parameters for the Iberian Peninsula and the Republic of Croatia}, author = {A. Morales-Esteban and F. Martínez-Álvarez and S. Scitovski and R. Scitovski}, url = {https://www.sciencedirect.com/science/article/pii/S0098300421001667}, doi = {https://doi.org/10.1016/j.cageo.2021.104873}, year = {2021}, date = {2021-07-09}, journal = {Computers and Geosciences}, volume = {156}, pages = {104873}, abstract = {The aim of this paper is to analyse the seismic activity of the Iberian Peninsula and a wide area of the Republicof Croatia. To do so, two incidence-magnitude seismic parameters have been defined. First, the areas have beendivided into several ellipsoidal clusters using Mahalanobis clustering. Four generalised indexes (Mahalanobis Calinski Harabasz, Mahalanobis Davies–Bouldin, Mahalanobis Simplified Silhouette Width Criterion and Mahalanobis Area) have been used to determine the most appropriate number of ellipsoidal clusters, on the basis of which a partition with four and a partition with eleven clusters have been considered. For the widearea of the Republic of Croatia there are fourteen clusters and the five areas that just affect Croatia have been analysed in detail. Then, to analyse the seismic activity of the areas, two incidence-magnitude seismic parameters have been defined and calculated: a) 𝛥(4), that represents the minimal number of successive years in which at least one earthquake of magnitude between 4 and 5 has been registered; b) 𝛥(5), that shows the number of years in which at least one earthquake of magnitude larger than 5 occurred. The calculation of 𝛥(4) for the South-west and the South-east of the Iberian Peninsula has provided two years for both. Regarding 𝛥(5), 10 and 12 years have been obtained for the South-west and the South-east of the Iberian Peninsula,respectively. The analysis of Croatia has shown that the Ston–Metković area has the highest seismic activity. The following results have been determined: 5 years for 𝛥(4) and 22 for 𝛥(5). It should be mentioned that the seresults cannot be used for predicting earthquakes. However, data about the incidences of earthquake events and their magnitudes can certainly serve as useful information in civil engineering.}, keywords = {clustering, natural disasters}, pubstate = {published}, tppubtype = {article} } The aim of this paper is to analyse the seismic activity of the Iberian Peninsula and a wide area of the Republicof Croatia. To do so, two incidence-magnitude seismic parameters have been defined. First, the areas have beendivided into several ellipsoidal clusters using Mahalanobis clustering. Four generalised indexes (Mahalanobis Calinski Harabasz, Mahalanobis Davies–Bouldin, Mahalanobis Simplified Silhouette Width Criterion and Mahalanobis Area) have been used to determine the most appropriate number of ellipsoidal clusters, on the basis of which a partition with four and a partition with eleven clusters have been considered. For the widearea of the Republic of Croatia there are fourteen clusters and the five areas that just affect Croatia have been analysed in detail. Then, to analyse the seismic activity of the areas, two incidence-magnitude seismic parameters have been defined and calculated: a) 𝛥(4), that represents the minimal number of successive years in which at least one earthquake of magnitude between 4 and 5 has been registered; b) 𝛥(5), that shows the number of years in which at least one earthquake of magnitude larger than 5 occurred. The calculation of 𝛥(4) for the South-west and the South-east of the Iberian Peninsula has provided two years for both. Regarding 𝛥(5), 10 and 12 years have been obtained for the South-west and the South-east of the Iberian Peninsula,respectively. The analysis of Croatia has shown that the Ston–Metković area has the highest seismic activity. The following results have been determined: 5 years for 𝛥(4) and 22 for 𝛥(5). It should be mentioned that the seresults cannot be used for predicting earthquakes. However, data about the incidences of earthquake events and their magnitudes can certainly serve as useful information in civil engineering. |
J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso Deep Learning for Time Series Forecasting: A Survey (Journal Article) Big Data, 9 (1), pp. 3-21, 2021. (Abstract | Links | BibTeX | Tags: big data, deep learning, time series) @article{TORRES21, title = {Deep Learning for Time Series Forecasting: A Survey}, author = {J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.liebertpub.com/doi/10.1089/big.2020.0159}, doi = {10.1089/big.2020.0159}, year = {2021}, date = {2021-02-05}, journal = {Big Data}, volume = {9}, number = {1}, pages = {3-21}, abstract = {Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed.}, keywords = {big data, deep learning, time series}, pubstate = {published}, tppubtype = {article} } Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed. |
R. Mortazavi and S. Mortazavi and A. Troncoso Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer (Journal Article) Engineering with Computers, 2021. (Abstract | Links | BibTeX | Tags: feature selection) @article{Mortazavi21, title = {Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer}, author = {R. Mortazavi and S. Mortazavi and A. Troncoso}, url = {https://link.springer.com/article/10.1007/s00366-020-01226-1}, doi = {10.1007/s00366-020-01226-1}, year = {2021}, date = {2021-01-01}, journal = {Engineering with Computers}, abstract = {This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy. |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach (Journal Article) Information Sciences, 558 , pp. 174-193, 2021. (Abstract | Links | BibTeX | Tags: big data, IoT, pattern recognition) @article{Melgar21_IS, title = {Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025521000220}, doi = {10.1016/j.ins.2020.12.089}, year = {2021}, date = {2021-01-01}, journal = {Information Sciences}, volume = {558}, pages = {174-193}, abstract = {Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. }, keywords = {big data, IoT, pattern recognition}, pubstate = {published}, tppubtype = {article} } Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. |
A. R. Troncoso-García and J. A. Ortega and R. Seepold and N. Martínez-Madrid Non-invasive devices for respiratory sound monitoring (Conference) KES International Conference on Knowledge Based and Intelligent information and Engineering Systems, 2021. (Links | BibTeX | Tags: data streaming, IoT) @conference{TRONCOSO-GARCIA21, title = {Non-invasive devices for respiratory sound monitoring}, author = {A. R. Troncoso-García and J. A. Ortega and R. Seepold and N. Martínez-Madrid}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921018135}, doi = {https://doi.org/10.1016/j.procs.2021.09.076}, year = {2021}, date = {2021-01-01}, booktitle = {KES International Conference on Knowledge Based and Intelligent information and Engineering Systems}, pages = {3040-3048}, keywords = {data streaming, IoT}, pubstate = {published}, tppubtype = {conference} } |
A. J. Pérez-Pulido and G. Asencio-Cortés and A. M. Brokate-Llanos and G. Brea-Calvo and M. R. Rodríguez-Griñolo and A. Garzón and M. J. Muñoz Serial co-expression analysis of host factors from SARS-CoV viruses highly converges with former high-throughput screenings and proposes key regulators (Journal Article) Briefings in Bioinformatics, 22 (2), pp. 1038–1052, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{pulido2021, title = {Serial co-expression analysis of host factors from SARS-CoV viruses highly converges with former high-throughput screenings and proposes key regulators}, author = {A. J. Pérez-Pulido and G. Asencio-Cortés and A. M. Brokate-Llanos and G. Brea-Calvo and M. R. Rodríguez-Griñolo and A. Garzón and M. J. Muñoz}, url = {https://academic.oup.com/bib/article/22/2/1038/6103172}, doi = {10.1093/bib/bbaa419}, year = {2021}, date = {2021-01-01}, journal = {Briefings in Bioinformatics}, volume = {22}, number = {2}, pages = {1038--1052}, abstract = {The current genomics era is bringing an unprecedented growth in the amount of gene expression data, only comparable to the exponential growth of sequences in databases during the last decades. This data allow the design of secondary analyses that take advantage of this information to create new knowledge. One of these feasible analyses is the evaluation of the expression level for a gene through a series of different conditions or cell types. Based on this idea, we have developed Automatic and Serial Analysis of CO-expression, which performs expression profiles for a given gene along hundreds of heterogeneous and normalized transcriptomics experiments and discover other genes that show either a similar or an inverse behavior. It might help to discover co-regulated genes, and common transcriptional regulators in any biological model. The present severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic is an opportunity to test this novel approach due to the wealth of data that are being generated, which could be used for validating results. Thus, we have identified 35 host factors in the literature putatively involved in the infectious cycle of SARS-CoV viruses and searched for genes tightly co-expressed with them. We have found 1899 co-expressed genes whose assigned functions are strongly related to viral cycles. Moreover, this set of genes heavily overlaps with those identified by former laboratory.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } The current genomics era is bringing an unprecedented growth in the amount of gene expression data, only comparable to the exponential growth of sequences in databases during the last decades. This data allow the design of secondary analyses that take advantage of this information to create new knowledge. One of these feasible analyses is the evaluation of the expression level for a gene through a series of different conditions or cell types. Based on this idea, we have developed Automatic and Serial Analysis of CO-expression, which performs expression profiles for a given gene along hundreds of heterogeneous and normalized transcriptomics experiments and discover other genes that show either a similar or an inverse behavior. It might help to discover co-regulated genes, and common transcriptional regulators in any biological model. The present severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic is an opportunity to test this novel approach due to the wealth of data that are being generated, which could be used for validating results. Thus, we have identified 35 host factors in the literature putatively involved in the infectious cycle of SARS-CoV viruses and searched for genes tightly co-expressed with them. We have found 1899 co-expressed genes whose assigned functions are strongly related to viral cycles. Moreover, this set of genes heavily overlaps with those identified by former laboratory. |
M. A. Molina and G. Asencio-Cortés and J. C. Riquelme and F. Martínez-Álvarez A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets (Conference) 15th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2020), Advances in Intelligent Systems and Computing 2021. (Links | BibTeX | Tags: deep learning, pattern recognition, transfer learning) @conference{molina2021, title = {A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets}, author = {M. A. Molina and G. Asencio-Cortés and J. C. Riquelme and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-57802-2_71}, year = {2021}, date = {2021-01-01}, booktitle = {15th International Conference on Soft Computing Models in Industrial and Environmental Applications (SOCO 2020)}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, pattern recognition, transfer learning}, pubstate = {published}, tppubtype = {conference} } |
S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor Adjacent Inputs With Different Labels and Hardness in Supervised Learning (Journal Article) IEEE Access, pp. 162487–162498, 2021. (Links | BibTeX | Tags: feature selection, pattern recognition) @article{grillo2021adjacent, title = {Adjacent Inputs With Different Labels and Hardness in Supervised Learning}, author = {S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor}, doi = {10.1109/ACCESS.2021.3131150 volume=9}, year = {2021}, date = {2021-01-01}, journal = {IEEE Access}, pages = {162487--162498}, publisher = {IEEE pubstate = published}, keywords = {feature selection, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
R. Parra and V. Ojeda and J.L. Vázquez Noguera and M. García-Torres and J.C. Mello-Román and C. Villalba and J. Facon and F. Divina and O. Cardozo and V. Castillo A Trust-Based Methodology to Evaluate Deep Learning Models for Automatic Diagnosis of Ocular Toxoplasmosis from Fundus Images (Journal Article) Diagnostics, 11 (11), pp. 1951, 2021. (Links | BibTeX | Tags: bioinformatics, deep learning, pattern recognition) @article{parra2021trust, title = {A Trust-Based Methodology to Evaluate Deep Learning Models for Automatic Diagnosis of Ocular Toxoplasmosis from Fundus Images}, author = {R. Parra and V. Ojeda and J.L. Vázquez Noguera and M. García-Torres and J.C. Mello-Román and C. Villalba and J. Facon and F. Divina and O. Cardozo and V. Castillo}, doi = {10.3390/diagnostics11111951}, year = {2021}, date = {2021-01-01}, journal = {Diagnostics}, volume = {11}, number = {11}, pages = {1951}, publisher = {Multidisciplinary Digital Publishing Institute pubstate = published}, keywords = {bioinformatics, deep learning, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
J. Ayala and M. García-Torres and J.L. Vázquez Noguera and F. Gómez-Vela and F. Divina Technical analysis strategy optimization using a machine learning approach in stock market indices (Journal Article) Knowledge-Based Systems, pp. 107119, 2021. (Links | BibTeX | Tags: deep learning, pattern recognition) @article{ayala2021technical, title = {Technical analysis strategy optimization using a machine learning approach in stock market indices}, author = {J. Ayala and M. García-Torres and J.L. Vázquez Noguera and F. Gómez-Vela and F. Divina}, doi = {10.1016/j.knosys.2021.107119 volume=225}, year = {2021}, date = {2021-01-01}, journal = {Knowledge-Based Systems}, pages = {107119}, publisher = {Elsevier pubstate = published}, keywords = {deep learning, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
P.M. Martínez-García and M. García-Torres and F. Divina and J. Terrón-Bautista and I. Delgado-Sainz and F. Gómez-Vela and F. Cortés-Ledesma Genome-wide prediction of topoisomerase II $beta$ binding by architectural factors and chromatin accessibility (Journal Article) PLoS computational biology, 17 (1), pp. e1007814, 2021. (Links | BibTeX | Tags: bioinformatics) @article{martinez2021genome, title = {Genome-wide prediction of topoisomerase II $beta$ binding by architectural factors and chromatin accessibility}, author = {P.M. Martínez-García and M. García-Torres and F. Divina and J. Terrón-Bautista and I. Delgado-Sainz and F. Gómez-Vela and F. Cortés-Ledesma}, doi = {10.1371/journal.pcbi.1007814}, year = {2021}, date = {2021-01-01}, journal = {PLoS computational biology}, volume = {17}, number = {1}, pages = {e1007814}, publisher = {Public Library of Science San Francisco, CA USA pubstate = published}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } |
A. Lopez-Fernandez and D. Rodriguez-Baena and F. Gomez-Vela and F. Divina and M. Garcia-Torres A multi-GPU biclustering algorithm for binary datasets (Journal Article) Journal of Parallel and Distributed Computing, 147 , pp. 209–219, 2021. (Links | BibTeX | Tags: bioinformatics, clustering) @article{lopez2021multi, title = {A multi-GPU biclustering algorithm for binary datasets}, author = {A. Lopez-Fernandez and D. Rodriguez-Baena and F. Gomez-Vela and F. Divina and M. Garcia-Torres}, doi = {10.1016/j.jpdc.2020.09.009}, year = {2021}, date = {2021-01-01}, journal = {Journal of Parallel and Distributed Computing}, volume = {147}, pages = {209--219}, publisher = {Elsevier pubstate = published}, keywords = {bioinformatics, clustering}, pubstate = {published}, tppubtype = {article} } |
V.E. Castillo Benítez and I. Castro Matto and J.C. Mello Román and J.L. Vázquez Noguera and M. García-Torres and J. Ayala and D.P. Pinto-Roa and P.E. Gardel-Sotomayor and J. Facon and S.A. Grillo Dataset from fundus images for the study of diabetic retinopathy (Journal Article) Data in Brief, 36 , pp. 107068, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{benitez2021dataset, title = {Dataset from fundus images for the study of diabetic retinopathy}, author = {V.E. Castillo Benítez and I. Castro Matto and J.C. Mello Román and J.L. Vázquez Noguera and M. García-Torres and J. Ayala and D.P. Pinto-Roa and P.E. Gardel-Sotomayor and J. Facon and S.A. Grillo}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921003528}, doi = {10.1016/j.dib.2021.107068}, year = {2021}, date = {2021-01-01}, journal = {Data in Brief}, volume = {36}, pages = {107068}, publisher = {Elsevier}, abstract = {This article presents a database containing 757 color fundus images acquired at the Department of Ophthalmology of the Hospital de Clínicas, Facultad de Ciencias Médicas (FCM), Universidad Nacional de Asunción (UNA), Paraguay. Firstly, the retinal images were acquired with a clinical procedure presented in this paper. The acquisition of the retinographies was made through the Visucam 500 camera of the Zeiss brand. Next, two expert ophthalmologists have classified the dataset. These data can help physicians and researchers in the detection of cases of Non-Proliferative Diabetic Retinopathy (NPDR) and Proliferative Diabetic Retinopathy (PDR), in their different stages. The dataset generated will be useful for ophthalmologists and researchers to work on automatic detection algorithms for Diabetic Retinopathy (DR).}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } This article presents a database containing 757 color fundus images acquired at the Department of Ophthalmology of the Hospital de Clínicas, Facultad de Ciencias Médicas (FCM), Universidad Nacional de Asunción (UNA), Paraguay. Firstly, the retinal images were acquired with a clinical procedure presented in this paper. The acquisition of the retinographies was made through the Visucam 500 camera of the Zeiss brand. Next, two expert ophthalmologists have classified the dataset. These data can help physicians and researchers in the detection of cases of Non-Proliferative Diabetic Retinopathy (NPDR) and Proliferative Diabetic Retinopathy (PDR), in their different stages. The dataset generated will be useful for ophthalmologists and researchers to work on automatic detection algorithms for Diabetic Retinopathy (DR). |
H. Ho Shin and C. Sauer Ayala and P. Pérez-Estigarribia and S.A. Grillo and L. Segovia-Cabrera and M. García-Torres and C. Gaona and S. Irala and M.E. Pedrozo and G. Sequera and J.L. Vázquez Noguera and E. De Los Santos A Mathematical Model for COVID-19 with Variable Transmissibility and Hospitalizations: A Case Study in Paraguay (Journal Article) Applied Sciences, 11 (20), pp. 9726, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{shin2021mathematical, title = {A Mathematical Model for COVID-19 with Variable Transmissibility and Hospitalizations: A Case Study in Paraguay}, author = {H. Ho Shin and C. Sauer Ayala and P. Pérez-Estigarribia and S.A. Grillo and L. Segovia-Cabrera and M. García-Torres and C. Gaona and S. Irala and M.E. Pedrozo and G. Sequera and J.L. Vázquez Noguera and E. De Los Santos}, url = {https://www.mdpi.com/2076-3417/11/20/9726}, doi = {10.3390/app11209726}, year = {2021}, date = {2021-01-01}, journal = {Applied Sciences}, volume = {11}, number = {20}, pages = {9726}, publisher = {Multidisciplinary Digital Publishing Institute}, abstract = {Forecasting the dynamics of the number of cases with coronavirus disease 2019 (COVID-19) in a given population is a challenging task due to behavioural changes which occur over short periods. Planning of hospital resources and containment measures in the near term require a scenario analysis and the use of predictive models to gain insight into possible outcomes for each scenario. In this paper, we present the SEIR-H epidemiological model for the spread dynamics in a given population and the impact of COVID-19 in the local health system. It was developed as an extension of the classic SEIR model to account for required hospital resources and behavioural changes of the population in response to containment measures. Time-varying parameters such as transmissibility are estimated using Bayesian methods, based on the database of reported cases with a moving time-window strategy. The assessment of the model offers reasonable results with estimated parameters and simulations, reflecting the observed dynamics in Paraguay. The proposed model can be used to simulate future scenarios and possible effects of containment strategies, to guide the public institution response based on the available resources in the local health system.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Forecasting the dynamics of the number of cases with coronavirus disease 2019 (COVID-19) in a given population is a challenging task due to behavioural changes which occur over short periods. Planning of hospital resources and containment measures in the near term require a scenario analysis and the use of predictive models to gain insight into possible outcomes for each scenario. In this paper, we present the SEIR-H epidemiological model for the spread dynamics in a given population and the impact of COVID-19 in the local health system. It was developed as an extension of the classic SEIR model to account for required hospital resources and behavioural changes of the population in response to containment measures. Time-varying parameters such as transmissibility are estimated using Bayesian methods, based on the database of reported cases with a moving time-window strategy. The assessment of the model offers reasonable results with estimated parameters and simulations, reflecting the observed dynamics in Paraguay. The proposed model can be used to simulate future scenarios and possible effects of containment strategies, to guide the public institution response based on the available resources in the local health system. |
F. Divina and F. Gómez-Vela and M. García-Torres Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast (Journal Article) Applied Sciences, 11 (3), pp. 1261, 2021. (Links | BibTeX | Tags: energy) @article{divina2021advanced, title = {Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast}, author = {F. Divina and F. Gómez-Vela and M. García-Torres}, url = {https://www.mdpi.com/2076-3417/11/3/1261/htm}, doi = {10.3390/app11031261}, year = {2021}, date = {2021-01-01}, journal = {Applied Sciences}, volume = {11}, number = {3}, pages = {1261}, publisher = {Multidisciplinary Digital Publishing Institute}, keywords = {energy}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-Acceleration of the Solar System from Gaia astrometry (Journal Article) Astronomy & Astrophysics, 649 , pp. A9, 2021. (Links | BibTeX | Tags: astrostatistics) @article{klioner2021gaia, title = {Gaia Early Data Release 3-Acceleration of the Solar System from Gaia astrometry}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/full_html/2021/05/aa39734-20/aa39734-20.html}, doi = {10.1051/0004-6361/202039734}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A9}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-The Galactic anticentre (Journal Article) Astronomy & Astrophysics, 649 , pp. A8, 2021. (Links | BibTeX | Tags: astrostatistics) @article{antoja2021gaia, title = {Gaia Early Data Release 3-The Galactic anticentre}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39714-20/aa39714-20.html}, doi = {10.1051/0004-6361/202039714}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A8}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
A. GA. Brown and A. Vallenari and T. Prusti and JHJ. De Bruijne and C. Babusiaux and M. Biermann and OL. Creevey and DW. Evans and L. Eyer and A. Hutton and M. García-Torres and others Gaia Early Data Release 3-Summary of the contents and survey properties (Journal Article) Astronomy & Astrophysics, 649 , pp. A1, 2021. (Links | BibTeX | Tags: astrostatistics) @article{brown2021gaia, title = {Gaia Early Data Release 3-Summary of the contents and survey properties}, author = {A. GA. Brown and A. Vallenari and T. Prusti and JHJ. De Bruijne and C. Babusiaux and M. Biermann and OL. Creevey and DW. Evans and L. Eyer and A. Hutton and M. García-Torres and others}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39657-20/aa39657-20.html}, doi = {10.1051/0004-6361/202039657}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A1}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-Structure and properties of the Magellanic Clouds (Journal Article) Astronomy & Astrophysics, 649 , pp. A7, 2021. (Links | BibTeX | Tags: astrostatistics) @article{luri2021gaia, title = {Gaia Early Data Release 3-Structure and properties of the Magellanic Clouds}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39588-20/aa39588-20.html}, doi = {10.1051/0004-6361/202039588}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A7}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-The Gaia Catalogue of Nearby Stars (Journal Article) Astronomy & Astrophysics, 649 , pp. A6, 2021. (Links | BibTeX | Tags: astrostatistics) @article{smart2021gaia, title = {Gaia Early Data Release 3-The Gaia Catalogue of Nearby Stars}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39498-20/aa39498-20.html}, doi = {https://doi.org/10.1051/0004-6361/202039498}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A6}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso Nearest neighbours-based forecasting for electricity demand time series in streaming (Conference) Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21), Lecture Notes in Artificial Intelligence 2021. (Abstract | BibTeX | Tags: IoT, time series) @conference{CAEPIA21_Laura, title = {Nearest neighbours-based forecasting for electricity demand time series in streaming}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso }, year = {2021}, date = {2021-01-01}, booktitle = {Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21)}, series = {Lecture Notes in Artificial Intelligence}, abstract = {This paper presents a forecasting algorithm for time series in streaming. The methodology has two well-differentiated stages: the algorithm searches for the nearest neighbors to generate an initial prediction model in the batch phase. Then, an online phase is carried out when the time series arrives in streaming. In particular, the nearest neighbor of the streaming data from the training set is computed and the nearest neighbors, previously computed in the batch phase, of this nearest neighbor are used to obtain the predictions. Results using the electricity consumption time series are reported, showing a remarkable performance of the proposed algorithm in terms of forecasting errors when compared to a nearest neighbors-based benchmark algorithm. The running times for the predictions are also remarkable.}, keywords = {IoT, time series}, pubstate = {published}, tppubtype = {conference} } This paper presents a forecasting algorithm for time series in streaming. The methodology has two well-differentiated stages: the algorithm searches for the nearest neighbors to generate an initial prediction model in the batch phase. Then, an online phase is carried out when the time series arrives in streaming. In particular, the nearest neighbor of the streaming data from the training set is computed and the nearest neighbors, previously computed in the batch phase, of this nearest neighbor are used to obtain the predictions. Results using the electricity consumption time series are reported, showing a remarkable performance of the proposed algorithm in terms of forecasting errors when compared to a nearest neighbors-based benchmark algorithm. The running times for the predictions are also remarkable. |