Publications
2021 |
J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso Deep Learning for Time Series Forecasting: A Survey (Journal Article) Big Data, 9 (1), pp. 3-21, 2021. (Abstract | Links | BibTeX | Tags: big data, deep learning, time series) @article{TORRES21, title = {Deep Learning for Time Series Forecasting: A Survey}, author = {J. F. Torres and D. Hadjout and A. Sebaa and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.liebertpub.com/doi/10.1089/big.2020.0159}, doi = {10.1089/big.2020.0159}, year = {2021}, date = {2021-02-05}, journal = {Big Data}, volume = {9}, number = {1}, pages = {3-21}, abstract = {Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed.}, keywords = {big data, deep learning, time series}, pubstate = {published}, tppubtype = {article} } Deep learning, one of the most remarkable techniques of machine learning, has been a major success in many fields, including image processing, speech recognition, and text understanding. It is powerful engines capable of learning arbitrary mapping functions, not require a scaled or stationary time series as input, support multivariate inputs, and support multi-step outputs. All of these features together make deep learning useful tools when dealing with more complex time series prediction problems involving large amounts of data, and multiple variables with complex relationships. This paper provides an overview of the most common Deep Learning types for time series forecasting, Explain the relationships between deep learning models and classical approaches to time series forecasting. A brief background of the particular challenges presents in time-series data and the most common deep learning techniques that are often used for time series forecasting is provided. Previous studies that applied deep learning to time series are reviewed. |
R. Mortazavi and S. Mortazavi and A. Troncoso Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer (Journal Article) Engineering with Computers, in press , 2021. (Abstract | Links | BibTeX | Tags: feature selection) @article{Mortazavi21, title = {Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer}, author = {R. Mortazavi and S. Mortazavi and A. Troncoso}, url = {https://link.springer.com/article/10.1007/s00366-020-01226-1}, doi = {10.1007/s00366-020-01226-1}, year = {2021}, date = {2021-01-01}, journal = {Engineering with Computers}, volume = {in press}, abstract = {This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy. |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach (Journal Article) Information Sciences, in press , 2021. (Abstract | BibTeX | Tags: big data, IoT, pattern recognition) @article{Melgar21_IS, title = {Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso}, year = {2021}, date = {2021-01-01}, journal = {Information Sciences}, volume = {in press}, abstract = {Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. }, keywords = {big data, IoT, pattern recognition}, pubstate = {published}, tppubtype = {article} } Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems (Journal Article) Neurocomputing, in press , 2021. (BibTeX | Tags: big data, deep learning) @article{MARTINEZ21, title = {Special Issue SOCO 2019: New trends in soft computing and its application in industrial and environmental problems}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, year = {2021}, date = {2021-01-01}, journal = {Neurocomputing}, volume = {in press}, keywords = {big data, deep learning}, pubstate = {published}, tppubtype = {article} } |
2020 |
P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series (Conference) HAIS 15th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: big data, energy, IoT, time series) @conference{HAIS2020, title = {A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series}, author = {P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61705-9_43}, year = {2020}, date = {2020-11-04}, booktitle = {HAIS 15th International Conference on Hybrid Artificial Intelligence Systems}, pages = {522-533}, series = {Lecture Notes in Computer Science}, keywords = {big data, energy, IoT, time series}, pubstate = {published}, tppubtype = {conference} } |
Y. Lin and I. Koprinska and M. Rana and A. Troncoso Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning (Conference) ICANN 29th International Conference on Artificial Neural Networks, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: energy, time series) @conference{ICANN20, title = {Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning}, author = {Y. Lin and I. Koprinska and M. Rana and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61609-0_22}, year = {2020}, date = {2020-10-14}, booktitle = {ICANN 29th International Conference on Artificial Neural Networks}, pages = {271-283}, series = {Lecture Notes in Computer Science }, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
L. Melgar-García and M. T. Godinho and R. Espada and D. Gutiérrez-Avilés and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero Discovering Spatio-Temporal Patterns in Precision Agriculture Based on Triclustering (Conference) SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2020. (Links | BibTeX | Tags: IoT, pattern recognition) @conference{SOCO20, title = {Discovering Spatio-Temporal Patterns in Precision Agriculture Based on Triclustering}, author = {L. Melgar-García and M. T. Godinho and R. Espada and D. Gutiérrez-Avilés and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero}, url = {https://link.springer.com/chapter/10.1007/978-3-030-57802-2_22}, year = {2020}, date = {2020-08-29}, booktitle = {SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications}, pages = {226-236}, series = {Advances in Intelligent Systems and Computing }, keywords = {IoT, pattern recognition}, pubstate = {published}, tppubtype = {conference} } |
O. Mitxelena-Hoyos and J. L. Amaro-Mellado and F. Martínez-Álvarez Use of IT in Project-Based Learning Applied to the Subject Surveying in Civil Engineering (Conference) ICEUTE 11th International Conference on European Transnational Education, 1266 , Advances in Intelligent Systems and Computing 2020. (Abstract | Links | BibTeX | Tags: education) @conference{MITXELENA20, title = {Use of IT in Project-Based Learning Applied to the Subject Surveying in Civil Engineering}, author = {O. Mitxelena-Hoyos and J. L. Amaro-Mellado and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007%2F978-3-030-57799-5_44}, doi = {https://doi.org/10.1007/978-3-030-57799-5_44}, year = {2020}, date = {2020-08-15}, booktitle = {ICEUTE 11th International Conference on European Transnational Education}, volume = {1266}, pages = {428-437}, series = {Advances in Intelligent Systems and Computing}, abstract = {This work describes the design and implementation of the subject of surveying under the Project-based Learning method. The modernization of teaching-learning and the new requirements of society to our graduates, force us to move towards a new style of higher education. In the case of topography, which is a transversal science closely related to the various skills of the degree, the learning conveyed by a project provides verisimilitude and depth of the knowledge acquired. Given the previous experiences, it is expected that better marks and performance are reached by the students.}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } This work describes the design and implementation of the subject of surveying under the Project-based Learning method. The modernization of teaching-learning and the new requirements of society to our graduates, force us to move towards a new style of higher education. In the case of topography, which is a transversal science closely related to the various skills of the degree, the learning conveyed by a project provides verisimilitude and depth of the knowledge acquired. Given the previous experiences, it is expected that better marks and performance are reached by the students. |
F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting (Journal Article) Applied Sciences, 10 (16), pp. 5487, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{DIVINA2020, title = {Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting}, author = {F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.mdpi.com/2076-3417/10/16/5487}, doi = {https://doi.org/10.3390/app10165487}, year = {2020}, date = {2020-07-30}, journal = {Applied Sciences}, volume = {10}, number = {16}, pages = {5487}, abstract = {The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared. |
F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model (Journal Article) Big Data, 8 (4), pp. 308-322, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{MARTINEZ-ALVAREZ20, title = {Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model}, author = {F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme}, url = {https://www.liebertpub.com/doi/full/10.1089/big.2020.0051}, doi = {10.1089/big.2020.0051}, year = {2020}, date = {2020-07-22}, journal = {Big Data}, volume = {8}, number = {4}, pages = {308-322}, abstract = {This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance. |
R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand (Journal Article) Information Sciences, 540 , pp. 160-174, 2020. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{PEREZ20, title = {Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand}, author = {R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025520306010}, doi = {10.1016/j.ins.2020.06.014}, year = {2020}, date = {2020-06-06}, journal = {Information Sciences}, volume = {540}, pages = {160-174}, abstract = {This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand. |
M. Nazeriye and A. Haeri and F. Martínez-Álvarez Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach (Journal Article) Applied Sciences, 10 (10), pp. 3589, 2020. (Abstract | Links | BibTeX | Tags: energy, time series) @article{NAZERIYE20, title = {Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach}, author = {M. Nazeriye and A. Haeri and F. Martínez-Álvarez}, url = {https://www.mdpi.com/2076-3417/10/10/3589/}, doi = {https://doi.org/10.3390/app10103589}, year = {2020}, date = {2020-05-22}, journal = {Applied Sciences}, volume = {10}, number = {10}, pages = {3589}, abstract = {Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results |
A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration (Journal Article) Big Data Research, 19-20 , pp. 100135, 2020. (Abstract | Links | BibTeX | Tags: big data, time series) @article{FERNANDEZ20, title = {Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration}, author = {A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S2214579620300034}, doi = {10.1016/j.bdr.2020.100135}, year = {2020}, date = {2020-05-12}, journal = {Big Data Research}, volume = {19-20}, pages = {100135}, abstract = {The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation.}, keywords = {big data, time series}, pubstate = {published}, tppubtype = {article} } The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation. |
G. Santamaría-Bonfil and M. B. Ibáñez and M. Pérez-Ramírez and G. Arroyo-Figueroa and F. Martínez-Álvarez Learning analytics for student modeling in virtual reality training systems: Lineworkers case (Journal Article) Computers and Education, 151 , pp. 103871, 2020. (Abstract | Links | BibTeX | Tags: education) @article{SANTAMARIA20, title = {Learning analytics for student modeling in virtual reality training systems: Lineworkers case}, author = {G. Santamaría-Bonfil and M. B. Ibáñez and M. Pérez-Ramírez and G. Arroyo-Figueroa and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0360131520300701}, doi = {https://doi.org/10.1016/j.compedu.2020.103871}, year = {2020}, date = {2020-03-10}, journal = {Computers and Education}, volume = {151}, pages = {103871}, abstract = {Live-line maintenance is a high risk activity. Hence, lineworkers require effective and safe training. Virtual Reality Training Systems (VRTS) provide an affordable and safe alternative for training in such high risk environments. However, their effectiveness relies mainly on having meaningful activities for supporting learning and on their ability to detect untrained students. This study builds a student model based on Learning Analytics (LA), using data collected from 1399 students that used a VRTS for the maintenance training of lineworkers in 329 courses carried out from 2008 to 2016. By employing several classifiers, the model allows discriminating between trained and untrained students in different maneuvers using three minimum evaluation proficiency scores. Using the best classifier, a Feature Importance Analysis is carried out to understand the impact of the variables regarding the trainees’ final performances. The model also involves the exploration of the trainees’ trace data through a visualization tool to pose non-observable behavioral variables related to displayed errors. The results show that the model can discriminate between trained and untrained students, the Random Forest algorithm standing out. The feature importance analysis revealed that the most relevant features regarding the trainees’ final performance were profile and course variables along with specific maneuver steps. Finally, using the visual tool, and with human expert aid, several error patterns in trace data associated with misconceptions and confusion were identified. In the light of these, LA enables disassembling the data jigsaw quandary from VRTS to enhance the human-in-the-loop evaluation.}, keywords = {education}, pubstate = {published}, tppubtype = {article} } Live-line maintenance is a high risk activity. Hence, lineworkers require effective and safe training. Virtual Reality Training Systems (VRTS) provide an affordable and safe alternative for training in such high risk environments. However, their effectiveness relies mainly on having meaningful activities for supporting learning and on their ability to detect untrained students. This study builds a student model based on Learning Analytics (LA), using data collected from 1399 students that used a VRTS for the maintenance training of lineworkers in 329 courses carried out from 2008 to 2016. By employing several classifiers, the model allows discriminating between trained and untrained students in different maneuvers using three minimum evaluation proficiency scores. Using the best classifier, a Feature Importance Analysis is carried out to understand the impact of the variables regarding the trainees’ final performances. The model also involves the exploration of the trainees’ trace data through a visualization tool to pose non-observable behavioral variables related to displayed errors. The results show that the model can discriminate between trained and untrained students, the Random Forest algorithm standing out. The feature importance analysis revealed that the most relevant features regarding the trainees’ final performance were profile and course variables along with specific maneuver steps. Finally, using the visual tool, and with human expert aid, several error patterns in trace data associated with misconceptions and confusion were identified. In the light of these, LA enables disassembling the data jigsaw quandary from VRTS to enhance the human-in-the-loop evaluation. |
K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude (Journal Article) Soil Dynamics and Earthquake Engineering, 130 , pp. id105932, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{ASIM20d, title = {Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude}, author = {K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal}, url = {https://www.sciencedirect.com/science/article/pii/S0267726119302192}, doi = {https://doi.org/10.1016/j.soildyn.2019.105932}, year = {2020}, date = {2020-03-01}, journal = {Soil Dynamics and Earthquake Engineering}, volume = {130}, pages = {id105932}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
F. Moleshi and A. Haeri and F. Martínez-Álvarez A novel hybrid GA–PSO framework for mining quantitative association rules (Journal Article) Soft Computing, 24 (6), pp. 4645-4666, 2020. (Abstract | Links | BibTeX | Tags: association rules) @article{MOLESHI20, title = {A novel hybrid GA–PSO framework for mining quantitative association rules}, author = {F. Moleshi and A. Haeri and F. Martínez-Álvarez}, url = {https://link.springer.com/article/10.1007/s00500-019-04226-6}, doi = {https://doi.org/10.1007/s00500-019-04226-6}, year = {2020}, date = {2020-03-01}, journal = {Soft Computing}, volume = {24}, number = {6}, pages = {4645-4666}, abstract = {Discovering association rules is a useful and common technique for data mining in which dependencies among datasets are shown. Discovering the rules from continuous numeric datasets is one of the common challenges in data mining. Furthermore, another restriction imposed by algorithms in this area is the need to determine the minimum threshold for the criteria of support and confidence. By drawing on two heuristic optimization techniques, to wit, the genetic algorithm (GA) and particle swarm optimization (PSO) algorithm, a hybrid algorithm for extracting quantitative association rules was developed in this research. Accurate and interpretable rules result from the integration of the multiple objectives GA with the multiple objective PSO algorithms, which redresses the balance in the exploitation and exploration tasks. The useful and appropriate rules and the most suitable numerical intervals are discovered by proposing a multi-criteria method in which there is no need to discretize numerical values and to determine threshold values of minimum support and confidence. Different criteria are used to determine appropriate rules. In this algorithm, the selected rules are extracted based on confidence, interestingness and comprehensibility. The results gained over five real-world datasets evidence the effectiveness of the proposed method. By hybridization of the GA and the PSO algorithm, the proposed approach has achieved considerable improvements compared with the basic algorithms in the criteria of the number of extracted rules from dataset, high confidence measure and support percentage.}, keywords = {association rules}, pubstate = {published}, tppubtype = {article} } Discovering association rules is a useful and common technique for data mining in which dependencies among datasets are shown. Discovering the rules from continuous numeric datasets is one of the common challenges in data mining. Furthermore, another restriction imposed by algorithms in this area is the need to determine the minimum threshold for the criteria of support and confidence. By drawing on two heuristic optimization techniques, to wit, the genetic algorithm (GA) and particle swarm optimization (PSO) algorithm, a hybrid algorithm for extracting quantitative association rules was developed in this research. Accurate and interpretable rules result from the integration of the multiple objectives GA with the multiple objective PSO algorithms, which redresses the balance in the exploitation and exploration tasks. The useful and appropriate rules and the most suitable numerical intervals are discovered by proposing a multi-criteria method in which there is no need to discretize numerical values and to determine threshold values of minimum support and confidence. Different criteria are used to determine appropriate rules. In this algorithm, the selected rules are extracted based on confidence, interestingness and comprehensibility. The results gained over five real-world datasets evidence the effectiveness of the proposed method. By hybridization of the GA and the PSO algorithm, the proposed approach has achieved considerable improvements compared with the basic algorithms in the criteria of the number of extracted rules from dataset, high confidence measure and support percentage. |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso High-content screening images streaming analysis using the STriGen methodology (Conference) SAC 35th Annual ACM Symposium on Applied Computing, 2020. (Links | BibTeX | Tags: bioinformatics) @conference{Melgar20_SAC, title = {High-content screening images streaming analysis using the STriGen methodology}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso }, doi = {doi.org/10.1145/3341105.3374071}, year = {2020}, date = {2020-03-01}, booktitle = {SAC 35th Annual ACM Symposium on Applied Computing}, pages = {537-539}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {conference} } |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special issue: HAIS16-IGPL (Journal Article) Logic Journal of the IGPL, 28 (1), pp. 1-3, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, pattern recognition) @article{IGPL20b, title = {Special issue: HAIS16-IGPL}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://doi.org/10.1093/jigpal/jzz066}, doi = {10.1093/jigpal/jzz066}, year = {2020}, date = {2020-02-01}, journal = {Logic Journal of the IGPL}, volume = {28}, number = {1}, pages = {1-3}, abstract = {Following, Fournier-Viger et al. propose to integrate the concept of correlation in high-utility itemset mining to find profitable itemsets that are highly correlated, using the all-confidence and bond measures. An efficient algorithm named FCHM (fast correlated high-utility itemset miner) is proposed to efficiently discover correlated high-utility itemsets. Two versions of the algorithm are proposed, named FCHMall-confidence and FCHMbond based on the all-confidence and bond measures, respectively. An experimental evaluation was done using four real-life benchmark data sets from the high-utility itemset mining literature: mushroom, retail, kosarak and foodmart. Results show that FCHM is efficient and can prune a huge amount of weakly correlated high-utility itemsets.}, keywords = {big data, deep learning, pattern recognition}, pubstate = {published}, tppubtype = {article} } Following, Fournier-Viger et al. propose to integrate the concept of correlation in high-utility itemset mining to find profitable itemsets that are highly correlated, using the all-confidence and bond measures. An efficient algorithm named FCHM (fast correlated high-utility itemset miner) is proposed to efficiently discover correlated high-utility itemsets. Two versions of the algorithm are proposed, named FCHMall-confidence and FCHMbond based on the all-confidence and bond measures, respectively. An experimental evaluation was done using four real-life benchmark data sets from the high-utility itemset mining literature: mushroom, retail, kosarak and foodmart. Results show that FCHM is efficient and can prune a huge amount of weakly correlated high-utility itemsets. |
D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area (Journal Article) Science of the Total Environment, 701 , pp. id134413, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{BUI20, title = {A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area}, author = {D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache}, url = {https://www.sciencedirect.com/science/article/pii/S0048969719344043}, doi = {https://doi.org/10.1016/j.scitotenv.2019.134413}, year = {2020}, date = {2020-01-20}, journal = {Science of the Total Environment}, volume = {701}, pages = {id134413}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez Time series clustering based on segment typologies extraction (Journal Article) IEEE Transactions on Cybernetics, 2020. (Abstract | Links | BibTeX | Tags: time series) @article{GUIJO20, title = {Time series clustering based on segment typologies extraction}, author = {D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez}, doi = {10.1109/TCYB.2019.2962584}, year = {2020}, date = {2020-01-15}, journal = {IEEE Transactions on Cybernetics}, abstract = {Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets. |
F. Martínez-Álvarez and D. T. Bui Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management (Editorial) (Journal Article) Remote Sensing, 12 (2), pp. 301, 2020, ISSN: 2072-4292. (Abstract | Links | BibTeX | Tags: big data, natural disasters) @article{MARTINEZ20c, title = {Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management (Editorial)}, author = {F. Martínez-Álvarez and D. T. Bui}, url = {https://www.mdpi.com/2072-4292/12/2/301}, doi = {10.3390/rs12020301}, issn = {2072-4292}, year = {2020}, date = {2020-01-01}, journal = {Remote Sensing}, volume = {12}, number = {2}, pages = {301}, abstract = {This editorial summarizes the performance of the special issue entitled Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management, which was published at MDPI’s Remote Sensing journal. The special issue took place in years 2018 and 2019 and accepted a total of nine papers from authors of thirteen different countries. So far, these papers have dealt with 116 cites. Earthquakes, landslides, floods, wildfire and soil salinity were the topics analyzed. New methods were introduced, with applications of the utmost relevance}, keywords = {big data, natural disasters}, pubstate = {published}, tppubtype = {article} } This editorial summarizes the performance of the special issue entitled Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management, which was published at MDPI’s Remote Sensing journal. The special issue took place in years 2018 and 2019 and accepted a total of nine papers from authors of thirteen different countries. So far, these papers have dealt with 116 cites. Earthquakes, landslides, floods, wildfire and soil salinity were the topics analyzed. New methods were introduced, with applications of the utmost relevance |
Ó. Trull and J.C. García-Díaz and A. Troncoso Initialization methods for multiple seasonal Holt–Winters forecasting models (Journal Article) Mathematics, 8 (2), pp. 268, 2020. (Links | BibTeX | Tags: energy, time series) @article{TRULL20a, title = {Initialization methods for multiple seasonal Holt–Winters forecasting models}, author = {Ó. Trull and J.C. García-Díaz and A. Troncoso}, doi = {10.3390/math8020268 }, year = {2020}, date = {2020-01-01}, journal = {Mathematics}, volume = {8}, number = {2}, pages = {268}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
C. Moreno-Carmona and J. M. Feria-Domínguez and A. Troncoso Applying the Open Government Principles to the University’s Strategic Planning: A Sound Practice (Journal Article) Sustainability, 12 (5), pp. 1826, 2020. (Links | BibTeX | Tags: education) @article{Moreno20, title = {Applying the Open Government Principles to the University’s Strategic Planning: A Sound Practice}, author = {C. Moreno-Carmona and J. M. Feria-Domínguez and A. Troncoso}, doi = {10.3390/su12051826 }, year = {2020}, date = {2020-01-01}, journal = {Sustainability}, volume = {12}, number = {5}, pages = {1826}, keywords = {education}, pubstate = {published}, tppubtype = {article} } |
Óscar Trull and J. Carlos García-Díaz and A. Troncoso Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain (Journal Article) Applied Sciences, 10 (7), pp. 2630, 2020. (Links | BibTeX | Tags: energy, time series) @article{Trull20b, title = {Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain}, author = {Óscar Trull and J. Carlos García-Díaz and A. Troncoso}, doi = {10.3390/app10072630}, year = {2020}, date = {2020-01-01}, journal = {Applied Sciences}, volume = {10}, number = {7}, pages = {2630}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
C. Lezcano and J. L. Vázquez-Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor A multi-objective approach for designing optimized operation sequence on binary image processing (Journal Article) Heliyon, 6 (4), pp. e03670, 2020. (Abstract | BibTeX | Tags: pattern recognition) @article{Lezcano20, title = {A multi-objective approach for designing optimized operation sequence on binary image processing}, author = {C. Lezcano and J. L. Vázquez-Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor}, year = {2020}, date = {2020-01-01}, journal = {Heliyon}, volume = {6}, number = {4}, pages = {e03670}, abstract = {In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set. |
F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks (Journal Article) Genes, 11 (7), pp. 831-864, 2020. (Abstract | BibTeX | Tags: bioinformatics) @article{Delgado-Chaves20, title = {Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks}, author = {F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {7}, pages = {831-864}, abstract = {Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches. |
F. Daumas-Ladouce and M. García-Torres and J. Luis Vázquez-Noguera and D. P. Pinto-Roa and H. Legal-Alaya Multi-Objective Pareto Histogram Equalization (Journal Article) Electronic Notes in Theoretical Computer Science, 349 , pp. 3-23, 2020. (Abstract | BibTeX | Tags: pattern recognition) @article{Daumas-Ladouce20, title = {Multi-Objective Pareto Histogram Equalization}, author = {F. Daumas-Ladouce and M. García-Torres and J. Luis Vázquez-Noguera and D. P. Pinto-Roa and H. Legal-Alaya}, year = {2020}, date = {2020-01-01}, journal = {Electronic Notes in Theoretical Computer Science}, volume = {349}, pages = {3-23}, abstract = {Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement. |
D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo Identifying livestock behavior patterns based on accelerometer dataset (Journal Article) Journal of Computational Science, 41 , pp. 101076, 2020. (Abstract | Links | BibTeX | Tags: pattern recognition) @article{Rodriguez-Baena20, title = {Identifying livestock behavior patterns based on accelerometer dataset}, author = {D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo}, url = {https://doi.org/10.1016/j.jocs.2020.101076}, doi = {10.1016/j.jocs.2020.101076}, year = {2020}, date = {2020-01-01}, journal = {Journal of Computational Science}, volume = {41}, pages = {101076}, abstract = {In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming. |
T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions (Journal Article) Genes, 11 (9), pp. 985, 2020. (Abstract | BibTeX | Tags: bioinformatics) @article{Vanhaeren20, title = {A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions}, author = {T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {9}, pages = {985}, abstract = {The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin |
A. López-Fernández and D. Rodriguez-Baena and F. Gómez-Vela and F. Divina and M. García-Torres A multi-GPU biclustering algorithm for binary datasets (Journal Article) Journal of Parallel and Distributed Computing, 147 , pp. 209-219, 2020. (Abstract | BibTeX | Tags: big data) @article{Lopez20, title = {A multi-GPU biclustering algorithm for binary datasets}, author = {A. López-Fernández and D. Rodriguez-Baena and F. Gómez-Vela and F. Divina and M. García-Torres}, year = {2020}, date = {2020-01-01}, journal = {Journal of Parallel and Distributed Computing}, volume = {147}, pages = {209-219}, abstract = {Graphics Processing Units technology (GPU) and CUDA architecture are one of the most used options to adapt machine learning techniques to the huge amounts of complex data that are currently generated. Biclustering techniques are useful for discovering local patterns in datasets. Those of them that have been implemented to use GPU resources in parallel have improved their computational performance. However, this fact does not guarantee that they can successfully process large datasets. There are some important issues that must be taken into account, like the data transfers between CPU and GPU memory or the balanced distribution of workload between the GPU resources. In this paper, a GPU version of one of the fastest biclustering solutions, BiBit, is presented. This implementation, named gBiBit, has been designed to take full advantage of the computational resources offered by GPU devices. Either using a single GPU device or in its multi-GPU mode, gBiBit is able to process large binary datasets. The experimental results have shown that gBiBit improves the computational performance of BiBit, a CPU parallel version and an early GPU version, called ParBiBit and CUBiBit, respectively. gBiBit source code is available at https://github.com/aureliolfdez/gbibit.}, keywords = {big data}, pubstate = {published}, tppubtype = {article} } Graphics Processing Units technology (GPU) and CUDA architecture are one of the most used options to adapt machine learning techniques to the huge amounts of complex data that are currently generated. Biclustering techniques are useful for discovering local patterns in datasets. Those of them that have been implemented to use GPU resources in parallel have improved their computational performance. However, this fact does not guarantee that they can successfully process large datasets. There are some important issues that must be taken into account, like the data transfers between CPU and GPU memory or the balanced distribution of workload between the GPU resources. In this paper, a GPU version of one of the fastest biclustering solutions, BiBit, is presented. This implementation, named gBiBit, has been designed to take full advantage of the computational resources offered by GPU devices. Either using a single GPU device or in its multi-GPU mode, gBiBit is able to process large binary datasets. The experimental results have shown that gBiBit improves the computational performance of BiBit, a CPU parallel version and an early GPU version, called ParBiBit and CUBiBit, respectively. gBiBit source code is available at https://github.com/aureliolfdez/gbibit. |
2019 |
C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system (Journal Article) IEEE Access, 7 , pp. 120840-120856, 2019. (Links | BibTeX | Tags: energy, time series) @article{GOMEZ19, title = {A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system}, author = {C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme}, url = {https://ieeexplore.ieee.org/document/8807120}, doi = {https://doi.org/10.1109/ACCESS.2019.2936478}, year = {2019}, date = {2019-08-01}, journal = {IEEE Access}, volume = {7}, pages = {120840-120856}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
F. Martínez-Álvarez and A. Morales-Esteban Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial) (Journal Article) Computers and Geosciences, 129 , pp. 38-39, 2019. (Links | BibTeX | Tags: big data, natural disasters, time series) @article{MARTINEZ19, title = {Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial)}, author = {F. Martínez-Álvarez and A. Morales-Esteban}, url = {https://www.sciencedirect.com/science/article/pii/S009830041930411X?dgcid=rss_sd_all}, doi = {https://doi.org/10.1016/j.cageo.2019.04.012}, year = {2019}, date = {2019-08-01}, journal = {Computers and Geosciences}, volume = {129}, pages = {38-39}, keywords = {big data, natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
C. Rubio-Escudero and F. Martínez-Álvarez and E. Atencia and A. Troncoso ICEUTE 10th International Conference on European Transnational Education, 951 , Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: education) @conference{RUBIO19, title = {Deployment of an internal quality assurance system at Pablo de Olavide University of Seville: improving students skills}, author = {C. Rubio-Escudero and F. Martínez-Álvarez and E. Atencia and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_35}, doi = {https://doi.org/10.1007/978-3-030-20005-3_3}, year = {2019}, date = {2019-05-16}, booktitle = {ICEUTE 10th International Conference on European Transnational Education}, volume = {951}, pages = {340-348}, series = {Advances in Intelligent Systems and Computing}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
J. L. Amaro-Mellado and D. Antón and M. Pérez-Suárez and F. Martínez-Álvarez Game-based Student Response System applied to a multidisciplinary teaching context (Conference) ICEUTE 10th International Conference on European Transnational Education, 951 , Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: education) @conference{AMARO19, title = {Game-based Student Response System applied to a multidisciplinary teaching context}, author = {J. L. Amaro-Mellado and D. Antón and M. Pérez-Suárez and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_34}, doi = {https://doi.org/10.1007/978-3-030-20005-3_34}, year = {2019}, date = {2019-05-16}, booktitle = {ICEUTE 10th International Conference on European Transnational Education}, volume = {951}, pages = {329-339}, series = {Advances in Intelligent Systems and Computing}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting (Conference) IWANN 15th International Work-Conference on Artificial Neural Networks, 11506 , Lecture Notes in Computer Science 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{TORRES19-2, title = {Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting}, author = {J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20521-8_22}, doi = {https://doi.org/10.1007/978-3-030-20521-8_22}, year = {2019}, date = {2019-05-16}, booktitle = {IWANN 15th International Work-Conference on Artificial Neural Networks}, volume = {11506}, pages = {259-269}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez Big data solar power forecasting based on deep learning and multiple data sources (Journal Article) Expert Systems, 36 , pp. id12394, 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @article{TORRES19-1, title = {Big data solar power forecasting based on deep learning and multiple data sources}, author = {J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/exsy.12394}, doi = {https://doi.org/10.1111/exsy.12394}, year = {2019}, date = {2019-03-01}, journal = {Expert Systems}, volume = {36}, pages = {id12394}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } |
F. Martinez-Alvarez and A. Schmutz and G. Asencio-Cortes and J. Jacques A Novel Hybrid Algorithm to Forecast Functional Time Series Based on Pattern Sequence Similarity with Application to Electricity Demand (Journal Article) Energies, 12 (94), pp. 1-18, 2019, ISSN: 1996-1073. (Abstract | Links | BibTeX | Tags: energy, time series) @article{en12010094b, title = {A Novel Hybrid Algorithm to Forecast Functional Time Series Based on Pattern Sequence Similarity with Application to Electricity Demand}, author = {F. Martinez-Alvarez and A. Schmutz and G. Asencio-Cortes and J. Jacques}, url = {http://www.mdpi.com/1996-1073/12/1/94}, doi = {10.3390/en12010094}, issn = {1996-1073}, year = {2019}, date = {2019-01-01}, journal = {Energies}, volume = {12}, number = {94}, pages = {1-18}, abstract = {The forecasting of future values is a very challenging task. In almost all scientific disciplines, the analysis of time series provides useful information and even economic benefits. In this context, this paper proposes a novel hybrid algorithm to forecast functional time series with arbitrary prediction horizons. It integrates a well-known clustering functional data algorithm into a forecasting strategy based on pattern sequence similarity, which was originally developed for discrete time series. The new approach assumes that some patterns are repeated over time, and it attempts to discover them and evaluate their immediate future. Hence, the algorithm first applies a clustering functional time series algorithm, i.e., it assigns labels to every data unit (it may represent either one hour, or one day, or any arbitrary length). As a result, the time series is transformed into a sequence of labels. Later, it retrieves the sequence of labels occurring just after the sample that we want to be forecasted. This sequence is searched for within the historical data, and every time it is found, the sample immediately after is stored. Once the searching process is terminated, the output is generated by weighting all stored data. The performance of the approach has been tested on real-world datasets related to electricity demand and compared to other existing methods, reporting very promising results. Finally, a statistical significance test has been carried out to confirm the suitability of the election of the compared methods. In conclusion, a novel algorithm to forecast functional time series is proposed with very satisfactory results when assessed in the context of electricity demand.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } The forecasting of future values is a very challenging task. In almost all scientific disciplines, the analysis of time series provides useful information and even economic benefits. In this context, this paper proposes a novel hybrid algorithm to forecast functional time series with arbitrary prediction horizons. It integrates a well-known clustering functional data algorithm into a forecasting strategy based on pattern sequence similarity, which was originally developed for discrete time series. The new approach assumes that some patterns are repeated over time, and it attempts to discover them and evaluate their immediate future. Hence, the algorithm first applies a clustering functional time series algorithm, i.e., it assigns labels to every data unit (it may represent either one hour, or one day, or any arbitrary length). As a result, the time series is transformed into a sequence of labels. Later, it retrieves the sequence of labels occurring just after the sample that we want to be forecasted. This sequence is searched for within the historical data, and every time it is found, the sample immediately after is stored. Once the searching process is terminated, the output is generated by weighting all stored data. The performance of the approach has been tested on real-world datasets related to electricity demand and compared to other existing methods, reporting very promising results. Finally, a statistical significance test has been carried out to confirm the suitability of the election of the compared methods. In conclusion, a novel algorithm to forecast functional time series is proposed with very satisfactory results when assessed in the context of electricity demand. |
Y. Lin and I. Koprinska and M. Rana and A. Troncoso Pattern Sequence Neural Network for Solar Power Forecasting (Conference) ICONIP 26th International Conference on Neural Information Processing, 2019. (BibTeX | Tags: energy, time series) @conference{ICONIP19, title = {Pattern Sequence Neural Network for Solar Power Forecasting}, author = {Y. Lin and I. Koprinska and M. Rana and A. Troncoso}, year = {2019}, date = {2019-01-01}, booktitle = {ICONIP 26th International Conference on Neural Information Processing}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
Ó. Trull and J. C. García-Díaz and A. Troncoso Application of Discrete-Interval Moving Seasonalities to Spanish Electricity Demand Forecasting during Easter (Journal Article) Energies, 12 (6), pp. 1083, 2019. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energies2019, title = {Application of Discrete-Interval Moving Seasonalities to Spanish Electricity Demand Forecasting during Easter}, author = {Ó. Trull and J. C. García-Díaz and A. Troncoso }, url = {https://www.mdpi.com/1996-1073/12/6/1083}, doi = {10.3390/en12061083}, year = {2019}, date = {2019-01-01}, journal = {Energies}, volume = {12}, number = {6}, pages = {1083}, abstract = {Forecasting electricity demand through time series is a tool used by transmission system operators to establish future operating conditions. The accuracy of these forecasts is essential for the precise development of activity. However, the accuracy of the forecasts is enormously subject to the calendar effect. The multiple seasonal Holt–Winters models are widely used due to the great precision and simplicity that they offer. Usually, these models relate this calendar effect to external variables that contribute to modification of their forecasts a posteriori. In this work, a new point of view is presented, where the calendar effect constitutes a built-in part of the Holt–Winters model. In particular, the proposed model incorporates discrete-interval moving seasonalities. Moreover, a clear example of the application of this methodology to situations that are difficult to treat, such as the days of Easter, is presented. The results show that the proposed model performs well, outperforming the regular Holt–Winters model and other methods such as artificial neural networks and Exponential Smoothing State Space Model with Box-Cox Transformation, ARMA Errors, Trend and Seasonal Components (TBATS) methods.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Forecasting electricity demand through time series is a tool used by transmission system operators to establish future operating conditions. The accuracy of these forecasts is essential for the precise development of activity. However, the accuracy of the forecasts is enormously subject to the calendar effect. The multiple seasonal Holt–Winters models are widely used due to the great precision and simplicity that they offer. Usually, these models relate this calendar effect to external variables that contribute to modification of their forecasts a posteriori. In this work, a new point of view is presented, where the calendar effect constitutes a built-in part of the Holt–Winters model. In particular, the proposed model incorporates discrete-interval moving seasonalities. Moreover, a clear example of the application of this methodology to situations that are difficult to treat, such as the days of Easter, is presented. The results show that the proposed model performs well, outperforming the regular Holt–Winters model and other methods such as artificial neural networks and Exponential Smoothing State Space Model with Box-Cox Transformation, ARMA Errors, Trend and Seasonal Components (TBATS) methods. |
A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Real-Time Big Data Analytics in Smart Cities from LoRa-based IoT Networks (Conference) SOCO 14th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: big data, IoT) @conference{SOCO2019, title = {Real-Time Big Data Analytics in Smart Cities from LoRa-based IoT Networks}, author = {A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20055-8_9}, year = {2019}, date = {2019-01-01}, booktitle = {SOCO 14th International Conference on Soft Computing Models in Industrial and Environmental Applications}, series = {Advances in Intelligent Systems and Computing}, keywords = {big data, IoT}, pubstate = {published}, tppubtype = {conference} } |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special issue on Hybrid Artificial Intelligence Systems from HAIS 2016 Conference (Journal Article) Neurocomputing, 353 , pp. 1-2, 2019. @article{NEUCOM2019b, title = {Special issue on Hybrid Artificial Intelligence Systems from HAIS 2016 Conference}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://www.sciencedirect.com/science/article/pii/S0925231219303297?via%3Dihub}, doi = {10.1016/j.neucom.2019.02.059}, year = {2019}, date = {2019-01-01}, journal = {Neurocomputing}, volume = {353}, pages = {1-2}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
F. M Delgado-Chaves and F. Gómez-Vela and M. García-Torres and F. Divina and J. Luis Vázquez Noguera Computational Inference of Gene Co-Expression Networks for the identification of Lung Carcinoma Biomarkers: An Ensemble Approach (Journal Article) Genes, 10 (12), pp. 962, 2019. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{Genes2019, title = {Computational Inference of Gene Co-Expression Networks for the identification of Lung Carcinoma Biomarkers: An Ensemble Approach}, author = {F. M Delgado-Chaves and F. Gómez-Vela and M. García-Torres and F. Divina and J. Luis Vázquez Noguera}, url = {https://www.mdpi.com/2073-4425/10/12/962}, doi = {https://doi.org/10.3390/genes10120962}, year = {2019}, date = {2019-01-01}, journal = {Genes}, volume = {10}, number = {12}, pages = {962}, abstract = {Gene Networks (GN), have emerged as an useful tool in recent years for the analysis of different diseases in the field of biomedicine. In particular, GNs have been widely applied for the study and analysis of different types of cancer. In this context, Lung carcinoma is among the most common cancer types and its short life expectancy is partly due to late diagnosis. For this reason, lung cancer biomarkers that can be easily measured are highly demanded in biomedical research. In this work, we present an application of gene co-expression networks in the modelling of lung cancer gene regulatory networks, which ultimately served to the discovery of new biomarkers. For this, a robust GN inference was performed from microarray data concomitantly using three different co-expression measures. Results identified a major cluster of genes involved in SRP-dependent co-translational protein target to membrane, as well as a set of 28 genes that were exclusively found in networks generated from cancer samples. Amongst potential biomarkers, genes NCKAP1L and DMD are highlighted due to their implications in a considerable portion of lung and bronchus primary carcinomas. These findings demonstrate the potential of GN reconstruction in the rational prediction of biomarkers.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Gene Networks (GN), have emerged as an useful tool in recent years for the analysis of different diseases in the field of biomedicine. In particular, GNs have been widely applied for the study and analysis of different types of cancer. In this context, Lung carcinoma is among the most common cancer types and its short life expectancy is partly due to late diagnosis. For this reason, lung cancer biomarkers that can be easily measured are highly demanded in biomedical research. In this work, we present an application of gene co-expression networks in the modelling of lung cancer gene regulatory networks, which ultimately served to the discovery of new biomarkers. For this, a robust GN inference was performed from microarray data concomitantly using three different co-expression measures. Results identified a major cluster of genes involved in SRP-dependent co-translational protein target to membrane, as well as a set of 28 genes that were exclusively found in networks generated from cancer samples. Amongst potential biomarkers, genes NCKAP1L and DMD are highlighted due to their implications in a considerable portion of lung and bronchus primary carcinomas. These findings demonstrate the potential of GN reconstruction in the rational prediction of biomarkers. |
F. Gómez-Vela and F. M Delgado-Chaves and D.S. Rodríguez-Baena and M. García-Torres and F. Divina Ensemble and Greedy Approach for the Reconstruction of Large Gene Co-Expression Networks (Journal Article) Entropy, 21 (12), pp. 1139, 2019. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{Entropy2019, title = {Ensemble and Greedy Approach for the Reconstruction of Large Gene Co-Expression Networks}, author = {F. Gómez-Vela and F. M Delgado-Chaves and D.S. Rodríguez-Baena and M. García-Torres and F. Divina}, url = {https://www.mdpi.com/1099-4300/21/12/1139}, doi = {https://doi.org/10.3390/e21121139}, year = {2019}, date = {2019-01-01}, journal = {Entropy}, volume = {21}, number = {12}, pages = {1139}, abstract = {Gene networks have become a powerful tool in the comprehensive analysis of gene expression. Due to the increasing amount of available data, computational methods for networks generation must deal with the so-called curse of dimensionality in the quest for the reliability of the obtained results. In this context, ensemble strategies have significantly improved the precision of results by combining different measures or methods. On the other hand, structure optimization techniques are also important in the reduction of the size of the networks, not only improving their topology but also keeping a positive prediction ratio. In this work, we present Ensemble and Greedy networks (EnGNet), a novel two-step method for gene networks inference. First, EnGNet uses an ensemble strategy for co-expression networks generation. Second, a greedy algorithm optimizes both the size and the topological features of the network. Not only do achieved results show that this method is able to obtain reliable networks, but also that it significantly improves topological features. Moreover, the usefulness of the method is proven by an application to a human dataset on post-traumatic stress disorder, revealing an innate immunity-mediated response to this pathology. These results are indicative of the method’s potential in the field of biomarkers discovery and characterization.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Gene networks have become a powerful tool in the comprehensive analysis of gene expression. Due to the increasing amount of available data, computational methods for networks generation must deal with the so-called curse of dimensionality in the quest for the reliability of the obtained results. In this context, ensemble strategies have significantly improved the precision of results by combining different measures or methods. On the other hand, structure optimization techniques are also important in the reduction of the size of the networks, not only improving their topology but also keeping a positive prediction ratio. In this work, we present Ensemble and Greedy networks (EnGNet), a novel two-step method for gene networks inference. First, EnGNet uses an ensemble strategy for co-expression networks generation. Second, a greedy algorithm optimizes both the size and the topological features of the network. Not only do achieved results show that this method is able to obtain reliable networks, but also that it significantly improves topological features. Moreover, the usefulness of the method is proven by an application to a human dataset on post-traumatic stress disorder, revealing an innate immunity-mediated response to this pathology. These results are indicative of the method’s potential in the field of biomarkers discovery and characterization. |
G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem (Journal Article) Information Sciences, 494 , pp. 1–20, 2019. (Abstract | Links | BibTeX | Tags: feature selection) @article{IS-2019, title = {A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem}, author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina}, url = {https://www.sciencedirect.com/science/article/pii/S0020025519303603}, doi = {https://doi.org/10.1016/j.ins.2019.04.046}, year = {2019}, date = {2019-01-01}, journal = {Information Sciences}, volume = {494}, pages = {1--20}, abstract = {In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method. |
F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings (Journal Article) Applied Sciences, 12 (10), pp. 1934, 2019. (Abstract | Links | BibTeX | Tags: energy, time series) @article{Energies2019b, title = {A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings}, author = {F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera}, url = {https://www.mdpi.com/1996-1073/12/10/1934}, doi = {https://doi.org/10.3390/en12101934}, year = {2019}, date = {2019-01-01}, journal = {Applied Sciences}, volume = {12}, number = {10}, pages = {1934}, abstract = {Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task.}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task. |
E.L. Mangas and A. Rubio and R. Álvarez-Marín and G. Labrador-Herrera and J. Pachón and M. Eugenia Pachón-Ibáñez and F. Divina and A.J. Pérez-Pulido Microbial Genomics, pp. mgen000309, 2019. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{MG2019, title = {Pangenome of Acinetobacter baumannii uncovers two groups of genomes, one of them with genes involved in CRISPR/Cas defence systems associated with the absence of plasmids and exclusive genes for biofilm formation}, author = {E.L. Mangas and A. Rubio and R. Álvarez-Marín and G. Labrador-Herrera and J. Pachón and M. Eugenia Pachón-Ibáñez and F. Divina and A.J. Pérez-Pulido}, url = {https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000309}, doi = {https://doi.org/10.1099/mgen.0.000309}, year = {2019}, date = {2019-01-01}, journal = {Microbial Genomics}, pages = {mgen000309}, abstract = {Acinetobacter baumannii is an opportunistic bacterium that causes hospital-acquired infections with a high mortality and morbidity, since there are strains resistant to virtually any kind of antibiotic. The chase to find novel strategies to fight against this microbe can be favoured by knowledge of the complete catalogue of genes of the species, and their relationship with the specific characteristics of different isolates. In this work, we performed a genomics analysis of almost 2500 strains. Two different groups of genomes were found based on the number of shared genes. One of these groups rarely has plasmids, and bears clustered regularly interspaced short palindromic repeat (CRISPR) sequences, in addition to CRISPR-associated genes (cas genes) or restriction-modification system genes. This fact strongly supports the lack of plasmids. Furthermore, the scarce plasmids in this group also bear CRISPR sequences, and specifically contain genes involved in prokaryotic toxin–antitoxin systems that could either act as the still little known CRISPR type IV system or be the precursors of other novel CRISPR/Cas systems. In addition, a limited set of strains present a new cas9-like gene, which may complement the other cas genes in inhibiting the entrance of new plasmids into the bacteria. Finally, this group has exclusive genes involved in biofilm formation, which would connect CRISPR systems to the biogenesis of these bacterial resistance structures.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Acinetobacter baumannii is an opportunistic bacterium that causes hospital-acquired infections with a high mortality and morbidity, since there are strains resistant to virtually any kind of antibiotic. The chase to find novel strategies to fight against this microbe can be favoured by knowledge of the complete catalogue of genes of the species, and their relationship with the specific characteristics of different isolates. In this work, we performed a genomics analysis of almost 2500 strains. Two different groups of genomes were found based on the number of shared genes. One of these groups rarely has plasmids, and bears clustered regularly interspaced short palindromic repeat (CRISPR) sequences, in addition to CRISPR-associated genes (cas genes) or restriction-modification system genes. This fact strongly supports the lack of plasmids. Furthermore, the scarce plasmids in this group also bear CRISPR sequences, and specifically contain genes involved in prokaryotic toxin–antitoxin systems that could either act as the still little known CRISPR type IV system or be the precursors of other novel CRISPR/Cas systems. In addition, a limited set of strains present a new cas9-like gene, which may complement the other cas genes in inhibiting the entrance of new plasmids into the bacteria. Finally, this group has exclusive genes involved in biofilm formation, which would connect CRISPR systems to the biogenesis of these bacterial resistance structures. |
V.E. Jiménez Chaves and M. García-Torres and J. Luis Vázquez Noguera and C.D. Cabrera Oviedo and A.P. Riego Esteche and F. Divina and M. Marrufo-Vázquez International Joint Conference: 12th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2019) and 10th International Conference on EUropean Transnational Education (ICEUTE 2019), 2019. (Links | BibTeX | Tags: education) @conference{Chaves2019, title = {Analysis of Teacher Training in Mathematics in Paraguay’s Elementary Education System Using Machine Learning Techniques}, author = {V.E. Jiménez Chaves and M. García-Torres and J. Luis Vázquez Noguera and C.D. Cabrera Oviedo and A.P. Riego Esteche and F. Divina and M. Marrufo-Vázquez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_29}, year = {2019}, date = {2019-01-01}, booktitle = {International Joint Conference: 12th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2019) and 10th International Conference on EUropean Transnational Education (ICEUTE 2019)}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
M. García-Torres and D. Becerra-Alonso and F. A Gómez-Vela and F. Divina and I. López Cobo and F. Martínez-Álvarez Analysis of Student Achievement Scores: A Machine Learning Approach (Conference) ICEUTE 10th International Conference on EUropean Transnational Education, Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: education) @conference{Garcia2019, title = {Analysis of Student Achievement Scores: A Machine Learning Approach}, author = {M. García-Torres and D. Becerra-Alonso and F. A Gómez-Vela and F. Divina and I. López Cobo and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_28}, year = {2019}, date = {2019-01-01}, booktitle = {ICEUTE 10th International Conference on EUropean Transnational Education}, pages = {275-284}, series = {Advances in Intelligent Systems and Computing}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
M. S. Tehrany and S. Jones and F. Shabani and F. Martínez-Álvarez and D. T. Bui Theoretical and Applied Climatology, 137 , pp. 637-653, 2019. (Links | BibTeX | Tags: natural disasters, time series) @article{TEHRANY19, title = {A Novel Ensemble Modelling Approach for the Spatial Prediction of Tropical Forest Fire Susceptibility Using Logitboost Machine Learning Classifier and Multi-source Geospatial Data}, author = {M. S. Tehrany and S. Jones and F. Shabani and F. Martínez-Álvarez and D. T. Bui}, url = {https://link.springer.com/article/10.1007/s00704-018-2628-9}, doi = {https://doi.org/10.1007/s00704-018-2628-9}, year = {2019}, date = {2019-01-01}, journal = {Theoretical and Applied Climatology}, volume = {137}, pages = {637-653}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
A. Galicia and R. Talavera-Llames and A. Troncoso and I. Koprinska and F. Martínez-Álvarez Multi-step forecasting for big data time series based on ensemble learning (Journal Article) Knowledge Based-Systems, 163 , pp. 830-841, 2019. (Links | BibTeX | Tags: big data, time series) @article{GALICIA19, title = {Multi-step forecasting for big data time series based on ensemble learning}, author = {A. Galicia and R. Talavera-Llames and A. Troncoso and I. Koprinska and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705118304957}, doi = {https://doi.org/10.1016/j.knosys.2018.10.009}, year = {2019}, date = {2019-01-01}, journal = {Knowledge Based-Systems}, volume = {163}, pages = {830-841}, keywords = {big data, time series}, pubstate = {published}, tppubtype = {article} } |