Publications
2021 |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach (Journal Article) Information Sciences, 558 , pp. 174-193, 2021. (Abstract | Links | BibTeX | Tags: big data, IoT, pattern recognition) @article{Melgar21_IS, title = {Discovering three-dimensional patterns in real-time from data streams: An online triclustering approach}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025521000220}, doi = {10.1016/j.ins.2020.12.089}, year = {2021}, date = {2021-01-01}, journal = {Information Sciences}, volume = {558}, pages = {174-193}, abstract = {Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. }, keywords = {big data, IoT, pattern recognition}, pubstate = {published}, tppubtype = {article} } Triclustering algorithms group sets of coordinates of 3-dimensional datasets. In this paper, a new triclustering approach for data streams is introduced. It follows a streaming scheme of learning in two steps: offline and online phases. First, the offline phase provides a summary model with the components of the triclusters. Then, the second stage is the online phase to deal with data in streaming. This online phase consists in using the summary model obtained in the offline stage to update the triclusters as fast as possible with genetic operators. Results using three types of synthetic datasets and a real-world environmental sensor dataset are reported. The performance of the proposed triclustering streaming algorithm is compared to a batch triclustering algorithm, showing an accurate performance both in terms of quality and running times. |
A. R. Troncoso-García and J. A. Ortega and R. Seepold and N. Martínez-Madrid Non-invasive devices for respiratory sound monitoring (Conference) KES International Conference on Knowledge Based and Intelligent information and Engineering Systems, 2021. @conference{TRONCOSO-GARCIA21, title = {Non-invasive devices for respiratory sound monitoring}, author = {A. R. Troncoso-García and J. A. Ortega and R. Seepold and N. Martínez-Madrid}, url = {https://www.sciencedirect.com/science/article/pii/S1877050921018135}, doi = {https://doi.org/10.1016/j.procs.2021.09.076}, year = {2021}, date = {2021-01-01}, booktitle = {KES International Conference on Knowledge Based and Intelligent information and Engineering Systems}, pages = {3040-3048}, keywords = {IoT}, pubstate = {published}, tppubtype = {conference} } |
A. J. Pérez-Pulido and G. Asencio-Cortés and A. M. Brokate-Llanos and G. Brea-Calvo and M. R. Rodríguez-Griñolo and A. Garzón and M. J. Muñoz Serial co-expression analysis of host factors from SARS-CoV viruses highly converges with former high-throughput screenings and proposes key regulators (Journal Article) Briefings in Bioinformatics, 22 (2), pp. 1038–1052, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{pulido2021, title = {Serial co-expression analysis of host factors from SARS-CoV viruses highly converges with former high-throughput screenings and proposes key regulators}, author = {A. J. Pérez-Pulido and G. Asencio-Cortés and A. M. Brokate-Llanos and G. Brea-Calvo and M. R. Rodríguez-Griñolo and A. Garzón and M. J. Muñoz}, url = {https://academic.oup.com/bib/article/22/2/1038/6103172}, doi = {10.1093/bib/bbaa419}, year = {2021}, date = {2021-01-01}, journal = {Briefings in Bioinformatics}, volume = {22}, number = {2}, pages = {1038--1052}, abstract = {The current genomics era is bringing an unprecedented growth in the amount of gene expression data, only comparable to the exponential growth of sequences in databases during the last decades. This data allow the design of secondary analyses that take advantage of this information to create new knowledge. One of these feasible analyses is the evaluation of the expression level for a gene through a series of different conditions or cell types. Based on this idea, we have developed Automatic and Serial Analysis of CO-expression, which performs expression profiles for a given gene along hundreds of heterogeneous and normalized transcriptomics experiments and discover other genes that show either a similar or an inverse behavior. It might help to discover co-regulated genes, and common transcriptional regulators in any biological model. The present severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic is an opportunity to test this novel approach due to the wealth of data that are being generated, which could be used for validating results. Thus, we have identified 35 host factors in the literature putatively involved in the infectious cycle of SARS-CoV viruses and searched for genes tightly co-expressed with them. We have found 1899 co-expressed genes whose assigned functions are strongly related to viral cycles. Moreover, this set of genes heavily overlaps with those identified by former laboratory.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } The current genomics era is bringing an unprecedented growth in the amount of gene expression data, only comparable to the exponential growth of sequences in databases during the last decades. This data allow the design of secondary analyses that take advantage of this information to create new knowledge. One of these feasible analyses is the evaluation of the expression level for a gene through a series of different conditions or cell types. Based on this idea, we have developed Automatic and Serial Analysis of CO-expression, which performs expression profiles for a given gene along hundreds of heterogeneous and normalized transcriptomics experiments and discover other genes that show either a similar or an inverse behavior. It might help to discover co-regulated genes, and common transcriptional regulators in any biological model. The present severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) pandemic is an opportunity to test this novel approach due to the wealth of data that are being generated, which could be used for validating results. Thus, we have identified 35 host factors in the literature putatively involved in the infectious cycle of SARS-CoV viruses and searched for genes tightly co-expressed with them. We have found 1899 co-expressed genes whose assigned functions are strongly related to viral cycles. Moreover, this set of genes heavily overlaps with those identified by former laboratory. |
S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor Adjacent Inputs With Different Labels and Hardness in Supervised Learning (Journal Article) IEEE Access, pp. 162487–162498, 2021. (Links | BibTeX | Tags: feature selection, pattern recognition) @article{grillo2021adjacent, title = {Adjacent Inputs With Different Labels and Hardness in Supervised Learning}, author = {S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor}, doi = {10.1109/ACCESS.2021.3131150 volume=9}, year = {2021}, date = {2021-01-01}, journal = {IEEE Access}, pages = {162487--162498}, publisher = {IEEE pubstate = published}, keywords = {feature selection, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
R. Parra and V. Ojeda and J.L. Vázquez Noguera and M. García-Torres and J.C. Mello-Román and C. Villalba and J. Facon and F. Divina and O. Cardozo and V. Castillo A Trust-Based Methodology to Evaluate Deep Learning Models for Automatic Diagnosis of Ocular Toxoplasmosis from Fundus Images (Journal Article) Diagnostics, 11 (11), pp. 1951, 2021. (Links | BibTeX | Tags: bioinformatics, deep learning, pattern recognition) @article{parra2021trust, title = {A Trust-Based Methodology to Evaluate Deep Learning Models for Automatic Diagnosis of Ocular Toxoplasmosis from Fundus Images}, author = {R. Parra and V. Ojeda and J.L. Vázquez Noguera and M. García-Torres and J.C. Mello-Román and C. Villalba and J. Facon and F. Divina and O. Cardozo and V. Castillo}, doi = {10.3390/diagnostics11111951}, year = {2021}, date = {2021-01-01}, journal = {Diagnostics}, volume = {11}, number = {11}, pages = {1951}, publisher = {Multidisciplinary Digital Publishing Institute pubstate = published}, keywords = {bioinformatics, deep learning, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
J. Ayala and M. García-Torres and J.L. Vázquez Noguera and F. Gómez-Vela and F. Divina Technical analysis strategy optimization using a machine learning approach in stock market indices (Journal Article) Knowledge-Based Systems, pp. 107119, 2021. (Links | BibTeX | Tags: deep learning, pattern recognition) @article{ayala2021technical, title = {Technical analysis strategy optimization using a machine learning approach in stock market indices}, author = {J. Ayala and M. García-Torres and J.L. Vázquez Noguera and F. Gómez-Vela and F. Divina}, doi = {10.1016/j.knosys.2021.107119 volume=225}, year = {2021}, date = {2021-01-01}, journal = {Knowledge-Based Systems}, pages = {107119}, publisher = {Elsevier pubstate = published}, keywords = {deep learning, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
P.M. Martínez-García and M. García-Torres and F. Divina and J. Terrón-Bautista and I. Delgado-Sainz and F. Gómez-Vela and F. Cortés-Ledesma Genome-wide prediction of topoisomerase II $beta$ binding by architectural factors and chromatin accessibility (Journal Article) PLoS computational biology, 17 (1), pp. e1007814, 2021. (Links | BibTeX | Tags: bioinformatics) @article{martinez2021genome, title = {Genome-wide prediction of topoisomerase II $beta$ binding by architectural factors and chromatin accessibility}, author = {P.M. Martínez-García and M. García-Torres and F. Divina and J. Terrón-Bautista and I. Delgado-Sainz and F. Gómez-Vela and F. Cortés-Ledesma}, doi = {10.1371/journal.pcbi.1007814}, year = {2021}, date = {2021-01-01}, journal = {PLoS computational biology}, volume = {17}, number = {1}, pages = {e1007814}, publisher = {Public Library of Science San Francisco, CA USA pubstate = published}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } |
A. Lopez-Fernandez and D. Rodriguez-Baena and F. Gomez-Vela and F. Divina and M. Garcia-Torres A multi-GPU biclustering algorithm for binary datasets (Journal Article) Journal of Parallel and Distributed Computing, 147 , pp. 209–219, 2021. (Links | BibTeX | Tags: bioinformatics, pattern recognition) @article{lopez2021multi, title = {A multi-GPU biclustering algorithm for binary datasets}, author = {A. Lopez-Fernandez and D. Rodriguez-Baena and F. Gomez-Vela and F. Divina and M. Garcia-Torres}, doi = {10.1016/j.jpdc.2020.09.009}, year = {2021}, date = {2021-01-01}, journal = {Journal of Parallel and Distributed Computing}, volume = {147}, pages = {209--219}, publisher = {Elsevier pubstate = published}, keywords = {bioinformatics, pattern recognition}, pubstate = {published}, tppubtype = {article} } |
V.E. Castillo Benítez and I. Castro Matto and J.C. Mello Román and J.L. Vázquez Noguera and M. García-Torres and J. Ayala and D.P. Pinto-Roa and P.E. Gardel-Sotomayor and J. Facon and S.A. Grillo Dataset from fundus images for the study of diabetic retinopathy (Journal Article) Data in Brief, 36 , pp. 107068, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{benitez2021dataset, title = {Dataset from fundus images for the study of diabetic retinopathy}, author = {V.E. Castillo Benítez and I. Castro Matto and J.C. Mello Román and J.L. Vázquez Noguera and M. García-Torres and J. Ayala and D.P. Pinto-Roa and P.E. Gardel-Sotomayor and J. Facon and S.A. Grillo}, url = {https://www.sciencedirect.com/science/article/pii/S2352340921003528}, doi = {10.1016/j.dib.2021.107068}, year = {2021}, date = {2021-01-01}, journal = {Data in Brief}, volume = {36}, pages = {107068}, publisher = {Elsevier}, abstract = {This article presents a database containing 757 color fundus images acquired at the Department of Ophthalmology of the Hospital de Clínicas, Facultad de Ciencias Médicas (FCM), Universidad Nacional de Asunción (UNA), Paraguay. Firstly, the retinal images were acquired with a clinical procedure presented in this paper. The acquisition of the retinographies was made through the Visucam 500 camera of the Zeiss brand. Next, two expert ophthalmologists have classified the dataset. These data can help physicians and researchers in the detection of cases of Non-Proliferative Diabetic Retinopathy (NPDR) and Proliferative Diabetic Retinopathy (PDR), in their different stages. The dataset generated will be useful for ophthalmologists and researchers to work on automatic detection algorithms for Diabetic Retinopathy (DR).}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } This article presents a database containing 757 color fundus images acquired at the Department of Ophthalmology of the Hospital de Clínicas, Facultad de Ciencias Médicas (FCM), Universidad Nacional de Asunción (UNA), Paraguay. Firstly, the retinal images were acquired with a clinical procedure presented in this paper. The acquisition of the retinographies was made through the Visucam 500 camera of the Zeiss brand. Next, two expert ophthalmologists have classified the dataset. These data can help physicians and researchers in the detection of cases of Non-Proliferative Diabetic Retinopathy (NPDR) and Proliferative Diabetic Retinopathy (PDR), in their different stages. The dataset generated will be useful for ophthalmologists and researchers to work on automatic detection algorithms for Diabetic Retinopathy (DR). |
H. Ho Shin and C. Sauer Ayala and P. Pérez-Estigarribia and S.A. Grillo and L. Segovia-Cabrera and M. García-Torres and C. Gaona and S. Irala and M.E. Pedrozo and G. Sequera and J.L. Vázquez Noguera and E. De Los Santos A Mathematical Model for COVID-19 with Variable Transmissibility and Hospitalizations: A Case Study in Paraguay (Journal Article) Applied Sciences, 11 (20), pp. 9726, 2021. (Abstract | Links | BibTeX | Tags: bioinformatics) @article{shin2021mathematical, title = {A Mathematical Model for COVID-19 with Variable Transmissibility and Hospitalizations: A Case Study in Paraguay}, author = {H. Ho Shin and C. Sauer Ayala and P. Pérez-Estigarribia and S.A. Grillo and L. Segovia-Cabrera and M. García-Torres and C. Gaona and S. Irala and M.E. Pedrozo and G. Sequera and J.L. Vázquez Noguera and E. De Los Santos}, url = {https://www.mdpi.com/2076-3417/11/20/9726}, doi = {10.3390/app11209726}, year = {2021}, date = {2021-01-01}, journal = {Applied Sciences}, volume = {11}, number = {20}, pages = {9726}, publisher = {Multidisciplinary Digital Publishing Institute}, abstract = {Forecasting the dynamics of the number of cases with coronavirus disease 2019 (COVID-19) in a given population is a challenging task due to behavioural changes which occur over short periods. Planning of hospital resources and containment measures in the near term require a scenario analysis and the use of predictive models to gain insight into possible outcomes for each scenario. In this paper, we present the SEIR-H epidemiological model for the spread dynamics in a given population and the impact of COVID-19 in the local health system. It was developed as an extension of the classic SEIR model to account for required hospital resources and behavioural changes of the population in response to containment measures. Time-varying parameters such as transmissibility are estimated using Bayesian methods, based on the database of reported cases with a moving time-window strategy. The assessment of the model offers reasonable results with estimated parameters and simulations, reflecting the observed dynamics in Paraguay. The proposed model can be used to simulate future scenarios and possible effects of containment strategies, to guide the public institution response based on the available resources in the local health system.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Forecasting the dynamics of the number of cases with coronavirus disease 2019 (COVID-19) in a given population is a challenging task due to behavioural changes which occur over short periods. Planning of hospital resources and containment measures in the near term require a scenario analysis and the use of predictive models to gain insight into possible outcomes for each scenario. In this paper, we present the SEIR-H epidemiological model for the spread dynamics in a given population and the impact of COVID-19 in the local health system. It was developed as an extension of the classic SEIR model to account for required hospital resources and behavioural changes of the population in response to containment measures. Time-varying parameters such as transmissibility are estimated using Bayesian methods, based on the database of reported cases with a moving time-window strategy. The assessment of the model offers reasonable results with estimated parameters and simulations, reflecting the observed dynamics in Paraguay. The proposed model can be used to simulate future scenarios and possible effects of containment strategies, to guide the public institution response based on the available resources in the local health system. |
F. Divina and F. Gómez-Vela and M. García-Torres Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast (Journal Article) Applied Sciences, 11 (3), pp. 1261, 2021. (Links | BibTeX | Tags: energy) @article{divina2021advanced, title = {Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast}, author = {F. Divina and F. Gómez-Vela and M. García-Torres}, url = {https://www.mdpi.com/2076-3417/11/3/1261/htm}, doi = {10.3390/app11031261}, year = {2021}, date = {2021-01-01}, journal = {Applied Sciences}, volume = {11}, number = {3}, pages = {1261}, publisher = {Multidisciplinary Digital Publishing Institute}, keywords = {energy}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-Acceleration of the Solar System from Gaia astrometry (Journal Article) Astronomy & Astrophysics, 649 , pp. A9, 2021. (Links | BibTeX | Tags: astrostatistics) @article{klioner2021gaia, title = {Gaia Early Data Release 3-Acceleration of the Solar System from Gaia astrometry}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/full_html/2021/05/aa39734-20/aa39734-20.html}, doi = {10.1051/0004-6361/202039734}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A9}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-The Galactic anticentre (Journal Article) Astronomy & Astrophysics, 649 , pp. A8, 2021. (Links | BibTeX | Tags: astrostatistics) @article{antoja2021gaia, title = {Gaia Early Data Release 3-The Galactic anticentre}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39714-20/aa39714-20.html}, doi = {10.1051/0004-6361/202039714}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A8}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
A. GA. Brown and A. Vallenari and T. Prusti and JHJ. De Bruijne and C. Babusiaux and M. Biermann and OL. Creevey and DW. Evans and L. Eyer and A. Hutton and M. García-Torres and others Gaia Early Data Release 3-Summary of the contents and survey properties (Journal Article) Astronomy & Astrophysics, 649 , pp. A1, 2021. (Links | BibTeX | Tags: astrostatistics) @article{brown2021gaia, title = {Gaia Early Data Release 3-Summary of the contents and survey properties}, author = {A. GA. Brown and A. Vallenari and T. Prusti and JHJ. De Bruijne and C. Babusiaux and M. Biermann and OL. Creevey and DW. Evans and L. Eyer and A. Hutton and M. García-Torres and others}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39657-20/aa39657-20.html}, doi = {10.1051/0004-6361/202039657}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A1}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-Structure and properties of the Magellanic Clouds (Journal Article) Astronomy & Astrophysics, 649 , pp. A7, 2021. (Links | BibTeX | Tags: astrostatistics) @article{luri2021gaia, title = {Gaia Early Data Release 3-Structure and properties of the Magellanic Clouds}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39588-20/aa39588-20.html}, doi = {10.1051/0004-6361/202039588}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A7}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
Gaia Collaboration and M. García-Torres Gaia Early Data Release 3-The Gaia Catalogue of Nearby Stars (Journal Article) Astronomy & Astrophysics, 649 , pp. A6, 2021. (Links | BibTeX | Tags: astrostatistics) @article{smart2021gaia, title = {Gaia Early Data Release 3-The Gaia Catalogue of Nearby Stars}, author = {Gaia Collaboration and M. García-Torres}, url = {https://www.aanda.org/articles/aa/abs/2021/05/aa39498-20/aa39498-20.html}, doi = {https://doi.org/10.1051/0004-6361/202039498}, year = {2021}, date = {2021-01-01}, journal = {Astronomy & Astrophysics}, volume = {649}, pages = {A6}, publisher = {EDP sciences}, keywords = {astrostatistics}, pubstate = {published}, tppubtype = {article} } |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso Nearest neighbours-based forecasting for electricity demand time series in streaming (Conference) Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21), Lecture Notes in Artificial Intelligence 2021. (Abstract | BibTeX | Tags: IoT, time series) @conference{CAEPIA21_Laura, title = {Nearest neighbours-based forecasting for electricity demand time series in streaming}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso }, year = {2021}, date = {2021-01-01}, booktitle = {Conference of the Spanish Association for Artificial Intelligence (CAEPIA'21)}, series = {Lecture Notes in Artificial Intelligence}, abstract = {This paper presents a forecasting algorithm for time series in streaming. The methodology has two well-differentiated stages: the algorithm searches for the nearest neighbors to generate an initial prediction model in the batch phase. Then, an online phase is carried out when the time series arrives in streaming. In particular, the nearest neighbor of the streaming data from the training set is computed and the nearest neighbors, previously computed in the batch phase, of this nearest neighbor are used to obtain the predictions. Results using the electricity consumption time series are reported, showing a remarkable performance of the proposed algorithm in terms of forecasting errors when compared to a nearest neighbors-based benchmark algorithm. The running times for the predictions are also remarkable.}, keywords = {IoT, time series}, pubstate = {published}, tppubtype = {conference} } This paper presents a forecasting algorithm for time series in streaming. The methodology has two well-differentiated stages: the algorithm searches for the nearest neighbors to generate an initial prediction model in the batch phase. Then, an online phase is carried out when the time series arrives in streaming. In particular, the nearest neighbor of the streaming data from the training set is computed and the nearest neighbors, previously computed in the batch phase, of this nearest neighbor are used to obtain the predictions. Results using the electricity consumption time series are reported, showing a remarkable performance of the proposed algorithm in terms of forecasting errors when compared to a nearest neighbors-based benchmark algorithm. The running times for the predictions are also remarkable. |
F. Pietrapiana and J. M. Feria-Dominguez and A. Troncoso Applying wrapper-based variable selection techniques to predict MFIs profitability: evidence from Peru (Journal Article) Journal of Development Effectiveness, 2021. (Abstract | Links | BibTeX | Tags: feature selection) @article{JDE_Feria, title = {Applying wrapper-based variable selection techniques to predict MFIs profitability: evidence from Peru}, author = {F. Pietrapiana and J. M. Feria-Dominguez and A. Troncoso}, doi = {10.1080/19439342.2021.1884119}, year = {2021}, date = {2021-01-01}, journal = {Journal of Development Effectiveness}, abstract = {In this paper, we analyse the main factors explaining the profitability (ROA) of Microfinance Institutions (MFIs) in Peru from 2011 to 2107. We apply three wrapper techniques to a sample of 168 Peruvians MFIs and 69 attributes obtained from MIX Market database. After running the algorithms M5ʹ, k nearest neighbours (KNN) and Random Forest, we find that the M5ʹ algorithm provides the best fit for predicting ROA. Particularly, the key variable of the regression tree is the percentage of expenses over assets and, depending on its value, it is followed by net income after taxes and before donations, or profit margins.}, keywords = {feature selection}, pubstate = {published}, tppubtype = {article} } In this paper, we analyse the main factors explaining the profitability (ROA) of Microfinance Institutions (MFIs) in Peru from 2011 to 2107. We apply three wrapper techniques to a sample of 168 Peruvians MFIs and 69 attributes obtained from MIX Market database. After running the algorithms M5ʹ, k nearest neighbours (KNN) and Random Forest, we find that the M5ʹ algorithm provides the best fit for predicting ROA. Particularly, the key variable of the regression tree is the percentage of expenses over assets and, depending on its value, it is followed by net income after taxes and before donations, or profit margins. |
2020 |
P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series (Conference) HAIS 15th International Conference on Hybrid Artificial Intelligence Systems, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: big data, energy, IoT, time series) @conference{HAIS2020, title = {A New Forecasting Algorithm Based on Neighbors for Streaming Electricity Time Series}, author = {P. Jiménez-Herrera and L. Melgar-García and G. Asencio-Cortés and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61705-9_43}, year = {2020}, date = {2020-11-04}, booktitle = {HAIS 15th International Conference on Hybrid Artificial Intelligence Systems}, pages = {522-533}, series = {Lecture Notes in Computer Science}, keywords = {big data, energy, IoT, time series}, pubstate = {published}, tppubtype = {conference} } |
Y. Lin and I. Koprinska and M. Rana and A. Troncoso Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning (Conference) ICANN 29th International Conference on Artificial Neural Networks, Lecture Notes in Computer Science 2020. (Links | BibTeX | Tags: energy, time series) @conference{ICANN20, title = {Solar Power Forecasting Based on Pattern Sequence Similarity and Meta-learning}, author = {Y. Lin and I. Koprinska and M. Rana and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-61609-0_22}, year = {2020}, date = {2020-10-14}, booktitle = {ICANN 29th International Conference on Artificial Neural Networks}, pages = {271-283}, series = {Lecture Notes in Computer Science }, keywords = {energy, time series}, pubstate = {published}, tppubtype = {conference} } |
L. Melgar-García and M. T. Godinho and R. Espada and D. Gutiérrez-Avilés and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero Discovering Spatio-Temporal Patterns in Precision Agriculture Based on Triclustering (Conference) SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2020. (Links | BibTeX | Tags: IoT, pattern recognition) @conference{SOCO20, title = {Discovering Spatio-Temporal Patterns in Precision Agriculture Based on Triclustering}, author = {L. Melgar-García and M. T. Godinho and R. Espada and D. Gutiérrez-Avilés and I. S. Brito and F. Martínez-Álvarez and A. Troncoso and C. Rubio-Escudero}, url = {https://link.springer.com/chapter/10.1007/978-3-030-57802-2_22}, year = {2020}, date = {2020-08-29}, booktitle = {SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications}, pages = {226-236}, series = {Advances in Intelligent Systems and Computing }, keywords = {IoT, pattern recognition}, pubstate = {published}, tppubtype = {conference} } |
O. Mitxelena-Hoyos and J. L. Amaro-Mellado and F. Martínez-Álvarez Use of IT in Project-Based Learning Applied to the Subject Surveying in Civil Engineering (Conference) ICEUTE 11th International Conference on European Transnational Education, 1266 , Advances in Intelligent Systems and Computing 2020. (Abstract | Links | BibTeX | Tags: education) @conference{MITXELENA20, title = {Use of IT in Project-Based Learning Applied to the Subject Surveying in Civil Engineering}, author = {O. Mitxelena-Hoyos and J. L. Amaro-Mellado and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007%2F978-3-030-57799-5_44}, doi = {https://doi.org/10.1007/978-3-030-57799-5_44}, year = {2020}, date = {2020-08-15}, booktitle = {ICEUTE 11th International Conference on European Transnational Education}, volume = {1266}, pages = {428-437}, series = {Advances in Intelligent Systems and Computing}, abstract = {This work describes the design and implementation of the subject of surveying under the Project-based Learning method. The modernization of teaching-learning and the new requirements of society to our graduates, force us to move towards a new style of higher education. In the case of topography, which is a transversal science closely related to the various skills of the degree, the learning conveyed by a project provides verisimilitude and depth of the knowledge acquired. Given the previous experiences, it is expected that better marks and performance are reached by the students.}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } This work describes the design and implementation of the subject of surveying under the Project-based Learning method. The modernization of teaching-learning and the new requirements of society to our graduates, force us to move towards a new style of higher education. In the case of topography, which is a transversal science closely related to the various skills of the degree, the learning conveyed by a project provides verisimilitude and depth of the knowledge acquired. Given the previous experiences, it is expected that better marks and performance are reached by the students. |
F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting (Journal Article) Applied Sciences, 10 (16), pp. 5487, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{DIVINA2020, title = {Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting}, author = {F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.mdpi.com/2076-3417/10/16/5487}, doi = {https://doi.org/10.3390/app10165487}, year = {2020}, date = {2020-07-30}, journal = {Applied Sciences}, volume = {10}, number = {16}, pages = {5487}, abstract = {The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared. |
F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model (Journal Article) Big Data, 8 (4), pp. 308-322, 2020. (Abstract | Links | BibTeX | Tags: big data, deep learning, energy, time series) @article{MARTINEZ-ALVAREZ20, title = {Coronavirus Optimization Algorithm: A bioinspired metaheuristic based on the COVID-19 propagation model}, author = {F. Martínez-Álvarez and G. Asencio-Cortés and J. F. Torres and D. Gutiérrez-Avilés and L. Melgar-García and R. Pérez-Chacón and C. Rubio-Escudero and A. Troncoso and J. C. Riquelme}, url = {https://www.liebertpub.com/doi/full/10.1089/big.2020.0051}, doi = {10.1089/big.2020.0051}, year = {2020}, date = {2020-07-22}, journal = {Big Data}, volume = {8}, number = {4}, pages = {308-322}, abstract = {This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance.}, keywords = {big data, deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel bioinspired metaheuristic, simulating how the coronavirus spreads and infects healthy people. From a primary infected individual (patient zero), the coronavirus rapidly infects new victims, creating large populations of infected people who will either die or spread infection. Relevant terms such as reinfection probability, super-spreading rate, social distancing measures or traveling rate are introduced into the model in order to simulate the coronavirus activity as accurately as possible. The infected population initially grows exponentially over time, but taking into consideration social isolation measures, the mortality rate and number of recoveries, the infected population gradually decreases. The Coronavirus Optimization Algorithm has two major advantages when compared to other similar strategies. Firstly, the input parameters are already set according to the disease statistics, preventing researchers from initializing them with arbitrary values. Secondly, the approach has the ability to end after several iterations, without setting this value either. Furthermore, a parallel multi-virus version is proposed, where several coronavirus strains evolve over time and explore wider search space areas in less iterations. Finally, the metaheuristic has been combined with deep learning models, in order to find optimal hyperparameters during the training phase. As application case, the problem of electricity load time series forecasting has been addressed, showing quite remarkable performance. |
R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand (Journal Article) Information Sciences, 540 , pp. 160-174, 2020. (Abstract | Links | BibTeX | Tags: big data, energy, time series) @article{PEREZ20, title = {Big data time series forecasting based on pattern sequence similarity and its application to the electricity demand}, author = {R. Pérez-Chacón and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.sciencedirect.com/science/article/pii/S0020025520306010}, doi = {10.1016/j.ins.2020.06.014}, year = {2020}, date = {2020-06-06}, journal = {Information Sciences}, volume = {540}, pages = {160-174}, abstract = {This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand.}, keywords = {big data, energy, time series}, pubstate = {published}, tppubtype = {article} } This work proposes a novel algorithm to forecast big data time series. Based on the well-established Pattern Sequence Forecasting algorithm, this new approach has two major contributions to the literature. First, the improvement of the aforementioned algorithm with respect to the accuracy of predictions, and second, its transformation into the big data context, having reached meaningful results in terms of scalability. The algorithm uses the Apache Spark distributed computation framework and it is a ready-to-use application with few parameters to adjust. Physical and cloud clusters have been used to carry out the experimentation, which consisted in applying the algorithm to real-world data from Uruguay electricity demand. |
M. Nazeriye and A. Haeri and F. Martínez-Álvarez Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach (Journal Article) Applied Sciences, 10 (10), pp. 3589, 2020. (Abstract | Links | BibTeX | Tags: energy, time series) @article{NAZERIYE20, title = {Analysis of the Impact of Residential Property and Equipment on Building Energy Efficiency and Consumption - A Data Mining Approach}, author = {M. Nazeriye and A. Haeri and F. Martínez-Álvarez}, url = {https://www.mdpi.com/2076-3417/10/10/3589/}, doi = {https://doi.org/10.3390/app10103589}, year = {2020}, date = {2020-05-22}, journal = {Applied Sciences}, volume = {10}, number = {10}, pages = {3589}, abstract = {Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } Human living could become very difficult due to a lack of energy. The household sector plays a significant role in energy consumption. Trying to optimize and achieve efficient energy consumption can lead to large-scale energy savings. The aim of this paper is to identify the equipment and property affecting energy efficiency and consumption in residential homes. For this purpose, a hybrid data-mining approach based on K-means algorithms and decision trees is presented. To analyze the approach, data is modeled once using the approach and then without it. A data set of residential homes of England and Wales is arranged in low, medium and high consumption clusters. The C5.0 algorithm is run on each cluster to extract factors affecting energy efficiency. The comparison of the modeling results, and also their accuracy, prove that the approach employed has the ability to extract the findings with greater accuracy and detail than in other cases. The installation of boilers, using cavity walls, and installing insulation could improve energy efficiency. Old homes and the usage of economy 7 electricity have an unfavorable effect on energy efficiency, but the approach shows that each cluster behaved differently in these factors related to energy efficiency and has unique results |
A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration (Journal Article) Big Data Research, 19-20 , pp. 100135, 2020. (Abstract | Links | BibTeX | Tags: big data, time series) @article{FERNANDEZ20, title = {Automated Deployment of a Spark Cluster with Machine Learning Algorithm Integration}, author = {A. M. Fernández and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S2214579620300034}, doi = {10.1016/j.bdr.2020.100135}, year = {2020}, date = {2020-05-12}, journal = {Big Data Research}, volume = {19-20}, pages = {100135}, abstract = {The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation.}, keywords = {big data, time series}, pubstate = {published}, tppubtype = {article} } The vast amount of data stored nowadays has turned big data analytics into a very trendy research field. The Spark distributed computing platform has emerged as a dominant and widely used paradigm for cluster deployment and big data analytics. However, to get started up is still a task that may take much time when manually done, due to the requisites that all nodes must fulfill. This work introduces LadonSpark, an open-source and non-commercial solution to configure and deploy a Spark cluster automatically. It has been specially designed for easy and efficient management of a Spark cluster with a friendly graphical user interface to automate the deployment of a cluster and to start up the distributed file system of Hadoop quickly. Moreover, LadonSpark includes the functionality of integrating any algorithm into the system. That is, the user only needs to provide the executable file and the number of required inputs for proper parametrization. Source codes developed in Scala, R, Python, or Java can be supported on LadonSpark. Besides, clustering, regression, classification, and association rules algorithms are already integrated so that users can test its usability from its initial installation. |
G. Santamaría-Bonfil and M. B. Ibáñez and M. Pérez-Ramírez and G. Arroyo-Figueroa and F. Martínez-Álvarez Learning analytics for student modeling in virtual reality training systems: Lineworkers case (Journal Article) Computers and Education, 151 , pp. 103871, 2020. (Abstract | Links | BibTeX | Tags: education) @article{SANTAMARIA20, title = {Learning analytics for student modeling in virtual reality training systems: Lineworkers case}, author = {G. Santamaría-Bonfil and M. B. Ibáñez and M. Pérez-Ramírez and G. Arroyo-Figueroa and F. Martínez-Álvarez}, url = {https://www.sciencedirect.com/science/article/pii/S0360131520300701}, doi = {https://doi.org/10.1016/j.compedu.2020.103871}, year = {2020}, date = {2020-03-10}, journal = {Computers and Education}, volume = {151}, pages = {103871}, abstract = {Live-line maintenance is a high risk activity. Hence, lineworkers require effective and safe training. Virtual Reality Training Systems (VRTS) provide an affordable and safe alternative for training in such high risk environments. However, their effectiveness relies mainly on having meaningful activities for supporting learning and on their ability to detect untrained students. This study builds a student model based on Learning Analytics (LA), using data collected from 1399 students that used a VRTS for the maintenance training of lineworkers in 329 courses carried out from 2008 to 2016. By employing several classifiers, the model allows discriminating between trained and untrained students in different maneuvers using three minimum evaluation proficiency scores. Using the best classifier, a Feature Importance Analysis is carried out to understand the impact of the variables regarding the trainees’ final performances. The model also involves the exploration of the trainees’ trace data through a visualization tool to pose non-observable behavioral variables related to displayed errors. The results show that the model can discriminate between trained and untrained students, the Random Forest algorithm standing out. The feature importance analysis revealed that the most relevant features regarding the trainees’ final performance were profile and course variables along with specific maneuver steps. Finally, using the visual tool, and with human expert aid, several error patterns in trace data associated with misconceptions and confusion were identified. In the light of these, LA enables disassembling the data jigsaw quandary from VRTS to enhance the human-in-the-loop evaluation.}, keywords = {education}, pubstate = {published}, tppubtype = {article} } Live-line maintenance is a high risk activity. Hence, lineworkers require effective and safe training. Virtual Reality Training Systems (VRTS) provide an affordable and safe alternative for training in such high risk environments. However, their effectiveness relies mainly on having meaningful activities for supporting learning and on their ability to detect untrained students. This study builds a student model based on Learning Analytics (LA), using data collected from 1399 students that used a VRTS for the maintenance training of lineworkers in 329 courses carried out from 2008 to 2016. By employing several classifiers, the model allows discriminating between trained and untrained students in different maneuvers using three minimum evaluation proficiency scores. Using the best classifier, a Feature Importance Analysis is carried out to understand the impact of the variables regarding the trainees’ final performances. The model also involves the exploration of the trainees’ trace data through a visualization tool to pose non-observable behavioral variables related to displayed errors. The results show that the model can discriminate between trained and untrained students, the Random Forest algorithm standing out. The feature importance analysis revealed that the most relevant features regarding the trainees’ final performance were profile and course variables along with specific maneuver steps. Finally, using the visual tool, and with human expert aid, several error patterns in trace data associated with misconceptions and confusion were identified. In the light of these, LA enables disassembling the data jigsaw quandary from VRTS to enhance the human-in-the-loop evaluation. |
K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude (Journal Article) Soil Dynamics and Earthquake Engineering, 130 , pp. id105932, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{ASIM20d, title = {Seismicity Analysis and Machine Learning Models for Short-Term Low Magnitude}, author = {K. Asim and E Elawadi and F. Martínez-Álvarez and I. A. Niaz and S. R. M. Sayed and T. Iqbal}, url = {https://www.sciencedirect.com/science/article/pii/S0267726119302192}, doi = {https://doi.org/10.1016/j.soildyn.2019.105932}, year = {2020}, date = {2020-03-01}, journal = {Soil Dynamics and Earthquake Engineering}, volume = {130}, pages = {id105932}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
F. Moleshi and A. Haeri and F. Martínez-Álvarez A novel hybrid GA–PSO framework for mining quantitative association rules (Journal Article) Soft Computing, 24 (6), pp. 4645-4666, 2020. (Abstract | Links | BibTeX | Tags: association rules) @article{MOLESHI20, title = {A novel hybrid GA–PSO framework for mining quantitative association rules}, author = {F. Moleshi and A. Haeri and F. Martínez-Álvarez}, url = {https://link.springer.com/article/10.1007/s00500-019-04226-6}, doi = {https://doi.org/10.1007/s00500-019-04226-6}, year = {2020}, date = {2020-03-01}, journal = {Soft Computing}, volume = {24}, number = {6}, pages = {4645-4666}, abstract = {Discovering association rules is a useful and common technique for data mining in which dependencies among datasets are shown. Discovering the rules from continuous numeric datasets is one of the common challenges in data mining. Furthermore, another restriction imposed by algorithms in this area is the need to determine the minimum threshold for the criteria of support and confidence. By drawing on two heuristic optimization techniques, to wit, the genetic algorithm (GA) and particle swarm optimization (PSO) algorithm, a hybrid algorithm for extracting quantitative association rules was developed in this research. Accurate and interpretable rules result from the integration of the multiple objectives GA with the multiple objective PSO algorithms, which redresses the balance in the exploitation and exploration tasks. The useful and appropriate rules and the most suitable numerical intervals are discovered by proposing a multi-criteria method in which there is no need to discretize numerical values and to determine threshold values of minimum support and confidence. Different criteria are used to determine appropriate rules. In this algorithm, the selected rules are extracted based on confidence, interestingness and comprehensibility. The results gained over five real-world datasets evidence the effectiveness of the proposed method. By hybridization of the GA and the PSO algorithm, the proposed approach has achieved considerable improvements compared with the basic algorithms in the criteria of the number of extracted rules from dataset, high confidence measure and support percentage.}, keywords = {association rules}, pubstate = {published}, tppubtype = {article} } Discovering association rules is a useful and common technique for data mining in which dependencies among datasets are shown. Discovering the rules from continuous numeric datasets is one of the common challenges in data mining. Furthermore, another restriction imposed by algorithms in this area is the need to determine the minimum threshold for the criteria of support and confidence. By drawing on two heuristic optimization techniques, to wit, the genetic algorithm (GA) and particle swarm optimization (PSO) algorithm, a hybrid algorithm for extracting quantitative association rules was developed in this research. Accurate and interpretable rules result from the integration of the multiple objectives GA with the multiple objective PSO algorithms, which redresses the balance in the exploitation and exploration tasks. The useful and appropriate rules and the most suitable numerical intervals are discovered by proposing a multi-criteria method in which there is no need to discretize numerical values and to determine threshold values of minimum support and confidence. Different criteria are used to determine appropriate rules. In this algorithm, the selected rules are extracted based on confidence, interestingness and comprehensibility. The results gained over five real-world datasets evidence the effectiveness of the proposed method. By hybridization of the GA and the PSO algorithm, the proposed approach has achieved considerable improvements compared with the basic algorithms in the criteria of the number of extracted rules from dataset, high confidence measure and support percentage. |
L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso High-content screening images streaming analysis using the STriGen methodology (Conference) SAC 35th Annual ACM Symposium on Applied Computing, 2020. (Links | BibTeX | Tags: bioinformatics) @conference{Melgar20_SAC, title = {High-content screening images streaming analysis using the STriGen methodology}, author = {L. Melgar-García and D. Gutiérrez-Avilés and C. Rubio-Escudero and A. Troncoso }, doi = {doi.org/10.1145/3341105.3374071}, year = {2020}, date = {2020-03-01}, booktitle = {SAC 35th Annual ACM Symposium on Applied Computing}, pages = {537-539}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {conference} } |
F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado Special issue: HAIS16-IGPL (Journal Article) Logic Journal of the IGPL, 28 (1), pp. 1-3, 2020. (Abstract | Links | BibTeX | Tags: ) @article{IGPL20b, title = {Special issue: HAIS16-IGPL}, author = {F. Martínez-Álvarez and A. Troncoso and H. Quintián and E. Corchado}, url = {https://doi.org/10.1093/jigpal/jzz066}, doi = {10.1093/jigpal/jzz066}, year = {2020}, date = {2020-02-01}, journal = {Logic Journal of the IGPL}, volume = {28}, number = {1}, pages = {1-3}, abstract = {Following, Fournier-Viger et al. propose to integrate the concept of correlation in high-utility itemset mining to find profitable itemsets that are highly correlated, using the all-confidence and bond measures. An efficient algorithm named FCHM (fast correlated high-utility itemset miner) is proposed to efficiently discover correlated high-utility itemsets. Two versions of the algorithm are proposed, named FCHMall-confidence and FCHMbond based on the all-confidence and bond measures, respectively. An experimental evaluation was done using four real-life benchmark data sets from the high-utility itemset mining literature: mushroom, retail, kosarak and foodmart. Results show that FCHM is efficient and can prune a huge amount of weakly correlated high-utility itemsets.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Following, Fournier-Viger et al. propose to integrate the concept of correlation in high-utility itemset mining to find profitable itemsets that are highly correlated, using the all-confidence and bond measures. An efficient algorithm named FCHM (fast correlated high-utility itemset miner) is proposed to efficiently discover correlated high-utility itemsets. Two versions of the algorithm are proposed, named FCHMall-confidence and FCHMbond based on the all-confidence and bond measures, respectively. An experimental evaluation was done using four real-life benchmark data sets from the high-utility itemset mining literature: mushroom, retail, kosarak and foodmart. Results show that FCHM is efficient and can prune a huge amount of weakly correlated high-utility itemsets. |
D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area (Journal Article) Science of the Total Environment, 701 , pp. id134413, 2020. (Links | BibTeX | Tags: natural disasters, time series) @article{BUI20, title = {A novel deep learning neural network approach for predicting flash flood susceptibility: A case study at a high frequency tropical storm area}, author = {D. T. Bui and N.-D. Hoang and F. Martínez-Álvarez and P.-T. T. Ngo and P. V. Hoa and T. D. Pham and P. Samui and R. Costache}, url = {https://www.sciencedirect.com/science/article/pii/S0048969719344043}, doi = {https://doi.org/10.1016/j.scitotenv.2019.134413}, year = {2020}, date = {2020-01-20}, journal = {Science of the Total Environment}, volume = {701}, pages = {id134413}, keywords = {natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez Time series clustering based on segment typologies extraction (Journal Article) IEEE Transactions on Cybernetics, 2020. (Abstract | Links | BibTeX | Tags: time series) @article{GUIJO20, title = {Time series clustering based on segment typologies extraction}, author = {D. Guijo-Rubio and A. M. Durán-Rosal and P. A. Gutiérrez and A. Troncoso and C. Hervás-Martínez}, doi = {10.1109/TCYB.2019.2962584}, year = {2020}, date = {2020-01-15}, journal = {IEEE Transactions on Cybernetics}, abstract = {Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets.}, keywords = {time series}, pubstate = {published}, tppubtype = {article} } Time-series clustering is the process of grouping time series with respect to their similarity or characteristics. Previous approaches usually combine a specific distance measure for time series and a standard clustering method. However, these approaches do not take the similarity of the different subsequences of each time series into account, which can be used to better compare the time-series objects of the dataset. In this article, we propose a novel technique of time-series clustering consisting of two clustering stages. In a first step, a least-squares polynomial segmentation procedure is applied to each time series, which is based on a growing window technique that returns different-length segments. Then, all of the segments are projected into the same dimensional space, based on the coefficients of the model that approximates the segment and a set of statistical features. After mapping, a first hierarchical clustering phase is applied to all mapped segments, returning groups of segments for each time series. These clusters are used to represent all time series in the same dimensional space, after defining another specific mapping process. In a second and final clustering stage, all the time-series objects are grouped. We consider internal clustering quality to automatically adjust the main parameter of the algorithm, which is an error threshold for the segmentation. The results obtained on 84 datasets from the UCR Time Series Classification Archive have been compared against three state-of-the-art methods, showing that the performance of this methodology is very promising, especially on larger datasets. |
F. Martínez-Álvarez and D. T. Bui Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management (Editorial) (Journal Article) Remote Sensing, 12 (2), pp. 301, 2020, ISSN: 2072-4292. (Abstract | Links | BibTeX | Tags: big data, natural disasters) @article{MARTINEZ20c, title = {Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management (Editorial)}, author = {F. Martínez-Álvarez and D. T. Bui}, url = {https://www.mdpi.com/2072-4292/12/2/301}, doi = {10.3390/rs12020301}, issn = {2072-4292}, year = {2020}, date = {2020-01-01}, journal = {Remote Sensing}, volume = {12}, number = {2}, pages = {301}, abstract = {This editorial summarizes the performance of the special issue entitled Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management, which was published at MDPI’s Remote Sensing journal. The special issue took place in years 2018 and 2019 and accepted a total of nine papers from authors of thirteen different countries. So far, these papers have dealt with 116 cites. Earthquakes, landslides, floods, wildfire and soil salinity were the topics analyzed. New methods were introduced, with applications of the utmost relevance}, keywords = {big data, natural disasters}, pubstate = {published}, tppubtype = {article} } This editorial summarizes the performance of the special issue entitled Advanced Machine Learning and Big Data Analytics in Remote Sensing for Natural Hazards Management, which was published at MDPI’s Remote Sensing journal. The special issue took place in years 2018 and 2019 and accepted a total of nine papers from authors of thirteen different countries. So far, these papers have dealt with 116 cites. Earthquakes, landslides, floods, wildfire and soil salinity were the topics analyzed. New methods were introduced, with applications of the utmost relevance |
Ó. Trull and J.C. García-Díaz and A. Troncoso Initialization methods for multiple seasonal Holt–Winters forecasting models (Journal Article) Mathematics, 8 (2), pp. 268, 2020. (Links | BibTeX | Tags: energy, time series) @article{TRULL20a, title = {Initialization methods for multiple seasonal Holt–Winters forecasting models}, author = {Ó. Trull and J.C. García-Díaz and A. Troncoso}, doi = {10.3390/math8020268 }, year = {2020}, date = {2020-01-01}, journal = {Mathematics}, volume = {8}, number = {2}, pages = {268}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
C. Moreno-Carmona and J. M. Feria-Domínguez and A. Troncoso Applying the Open Government Principles to the University’s Strategic Planning: A Sound Practice (Journal Article) Sustainability, 12 (5), pp. 1826, 2020. (Links | BibTeX | Tags: education) @article{Moreno20, title = {Applying the Open Government Principles to the University’s Strategic Planning: A Sound Practice}, author = {C. Moreno-Carmona and J. M. Feria-Domínguez and A. Troncoso}, doi = {10.3390/su12051826 }, year = {2020}, date = {2020-01-01}, journal = {Sustainability}, volume = {12}, number = {5}, pages = {1826}, keywords = {education}, pubstate = {published}, tppubtype = {article} } |
Óscar Trull and J. Carlos García-Díaz and A. Troncoso Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain (Journal Article) Applied Sciences, 10 (7), pp. 2630, 2020. (Links | BibTeX | Tags: energy, time series) @article{Trull20b, title = {Stability of Multiple Seasonal Holt-Winters Models Applied to Hourly Electricity Demand in Spain}, author = {Óscar Trull and J. Carlos García-Díaz and A. Troncoso}, doi = {10.3390/app10072630}, year = {2020}, date = {2020-01-01}, journal = {Applied Sciences}, volume = {10}, number = {7}, pages = {2630}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
C. Lezcano and J.L. Vázquez Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor A multi-objective approach for designing optimized operation sequence on binary image processing (Journal Article) Heliyon, 6 (4), pp. e03670, 2020. (Abstract | BibTeX | Tags: pattern recognition) @article{Lezcano20, title = {A multi-objective approach for designing optimized operation sequence on binary image processing}, author = {C. Lezcano and J.L. Vázquez Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor}, year = {2020}, date = {2020-01-01}, journal = {Heliyon}, volume = {6}, number = {4}, pages = {e03670}, abstract = {In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set. |
F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks (Journal Article) Genes, 11 (7), pp. 831-864, 2020. (Abstract | BibTeX | Tags: bioinformatics) @article{Delgado-Chaves20, title = {Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks}, author = {F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {7}, pages = {831-864}, abstract = {Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches.}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches. |
F. Daumas-Ladouce and M. García-Torres and J.L. Vázquez Noguera and D. P. Pinto-Roa and H. Legal-Alaya Multi-Objective Pareto Histogram Equalization (Journal Article) Electronic Notes in Theoretical Computer Science, 349 , pp. 3-23, 2020. (Abstract | BibTeX | Tags: pattern recognition) @article{Daumas-Ladouce20, title = {Multi-Objective Pareto Histogram Equalization}, author = {F. Daumas-Ladouce and M. García-Torres and J.L. Vázquez Noguera and D. P. Pinto-Roa and H. Legal-Alaya}, year = {2020}, date = {2020-01-01}, journal = {Electronic Notes in Theoretical Computer Science}, volume = {349}, pages = {3-23}, abstract = {Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement. |
D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo Identifying livestock behavior patterns based on accelerometer dataset (Journal Article) Journal of Computational Science, 41 , pp. 101076, 2020. (Abstract | Links | BibTeX | Tags: pattern recognition) @article{Rodriguez-Baena20, title = {Identifying livestock behavior patterns based on accelerometer dataset}, author = {D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo}, url = {https://doi.org/10.1016/j.jocs.2020.101076}, doi = {10.1016/j.jocs.2020.101076}, year = {2020}, date = {2020-01-01}, journal = {Journal of Computational Science}, volume = {41}, pages = {101076}, abstract = {In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming.}, keywords = {pattern recognition}, pubstate = {published}, tppubtype = {article} } In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming. |
T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions (Journal Article) Genes, 11 (9), pp. 985, 2020. (Abstract | BibTeX | Tags: bioinformatics) @article{Vanhaeren20, title = {A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions}, author = {T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {9}, pages = {985}, abstract = {The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin}, keywords = {bioinformatics}, pubstate = {published}, tppubtype = {article} } The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin |
M. A. Molina and G. Asencio-Cortés and J. C. Riquelme and F. Martínez-Álvarez A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets (Conference) SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications, 1268 , Advances in Intelligent Systems and Computing 2020. (Links | BibTeX | Tags: deep learning, pattern recognition, transfer learning) @conference{molina2021, title = {A Preliminary Study on Deep Transfer Learning Applied to Image Classification for Small Datasets}, author = {M. A. Molina and G. Asencio-Cortés and J. C. Riquelme and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-57802-2_71}, year = {2020}, date = {2020-01-01}, booktitle = {SOCO 15th International Conference on Soft Computing Models in Industrial and Environmental Applications}, volume = {1268}, pages = {741-750}, series = {Advances in Intelligent Systems and Computing}, keywords = {deep learning, pattern recognition, transfer learning}, pubstate = {published}, tppubtype = {conference} } |
2019 |
C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system (Journal Article) IEEE Access, 7 , pp. 120840-120856, 2019. (Links | BibTeX | Tags: energy, time series) @article{GOMEZ19, title = {A novel ensemble method for electric vehicle power consumption forecasting: application to the Spanish system}, author = {C. Gómez-Quiles and G. Asencio-Cortés and A. Gastalver-Rubio and F. Martínez-Álvarez and A. Troncoso and J. Manresa and J. C. Riquelme and J. M. Riquelme}, url = {https://ieeexplore.ieee.org/document/8807120}, doi = {https://doi.org/10.1109/ACCESS.2019.2936478}, year = {2019}, date = {2019-08-01}, journal = {IEEE Access}, volume = {7}, pages = {120840-120856}, keywords = {energy, time series}, pubstate = {published}, tppubtype = {article} } |
F. Martínez-Álvarez and A. Morales-Esteban Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial) (Journal Article) Computers and Geosciences, 129 , pp. 38-39, 2019. (Links | BibTeX | Tags: big data, natural disasters, time series) @article{MARTINEZ19, title = {Big data and natural disasters: New approaches for temporal and spatial massive data analysis (Editorial)}, author = {F. Martínez-Álvarez and A. Morales-Esteban}, url = {https://www.sciencedirect.com/science/article/pii/S009830041930411X?dgcid=rss_sd_all}, doi = {https://doi.org/10.1016/j.cageo.2019.04.012}, year = {2019}, date = {2019-08-01}, journal = {Computers and Geosciences}, volume = {129}, pages = {38-39}, keywords = {big data, natural disasters, time series}, pubstate = {published}, tppubtype = {article} } |
C. Rubio-Escudero and F. Martínez-Álvarez and E. Atencia and A. Troncoso ICEUTE 10th International Conference on European Transnational Education, 951 , Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: education) @conference{RUBIO19, title = {Deployment of an internal quality assurance system at Pablo de Olavide University of Seville: improving students skills}, author = {C. Rubio-Escudero and F. Martínez-Álvarez and E. Atencia and A. Troncoso}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_35}, doi = {https://doi.org/10.1007/978-3-030-20005-3_3}, year = {2019}, date = {2019-05-16}, booktitle = {ICEUTE 10th International Conference on European Transnational Education}, volume = {951}, pages = {340-348}, series = {Advances in Intelligent Systems and Computing}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
J. L. Amaro-Mellado and D. Antón and M. Pérez-Suárez and F. Martínez-Álvarez Game-based Student Response System applied to a multidisciplinary teaching context (Conference) ICEUTE 10th International Conference on European Transnational Education, 951 , Advances in Intelligent Systems and Computing 2019. (Links | BibTeX | Tags: education) @conference{AMARO19, title = {Game-based Student Response System applied to a multidisciplinary teaching context}, author = {J. L. Amaro-Mellado and D. Antón and M. Pérez-Suárez and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_34}, doi = {https://doi.org/10.1007/978-3-030-20005-3_34}, year = {2019}, date = {2019-05-16}, booktitle = {ICEUTE 10th International Conference on European Transnational Education}, volume = {951}, pages = {329-339}, series = {Advances in Intelligent Systems and Computing}, keywords = {education}, pubstate = {published}, tppubtype = {conference} } |
J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting (Conference) IWANN 15th International Work-Conference on Artificial Neural Networks, 11506 , Lecture Notes in Computer Science 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @conference{TORRES19-2, title = {Random Hyper-Parameter Search-Based Deep Neural Network for Power Consumption Forecasting}, author = {J. F. Torres and D. Gutiérrez-Avilés and A. Troncoso and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20521-8_22}, doi = {https://doi.org/10.1007/978-3-030-20521-8_22}, year = {2019}, date = {2019-05-16}, booktitle = {IWANN 15th International Work-Conference on Artificial Neural Networks}, volume = {11506}, pages = {259-269}, series = {Lecture Notes in Computer Science}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {conference} } |
J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez Big data solar power forecasting based on deep learning and multiple data sources (Journal Article) Expert Systems, 36 , pp. id12394, 2019. (Links | BibTeX | Tags: deep learning, energy, time series) @article{TORRES19-1, title = {Big data solar power forecasting based on deep learning and multiple data sources}, author = {J. F. Torres and A. Troncoso and I. Koprinska and Z. Wang and F. Martínez-Álvarez}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/exsy.12394}, doi = {https://doi.org/10.1111/exsy.12394}, year = {2019}, date = {2019-03-01}, journal = {Expert Systems}, volume = {36}, pages = {id12394}, keywords = {deep learning, energy, time series}, pubstate = {published}, tppubtype = {article} } |