Miguel García Torres is an associate professor in the Escuela Politécnica Superior of the Universidad Pablo de Olavide. He received the BS degree in physics and the PhD degree in computer science from the Universidad de La Laguna, Tenerife, Spain, in 2001 and 2007, respectively. After obtaining the doctorate he held a postoc position in the Laboratory for Space Astrophysics and Theoretical Physics at the National institute of Aerospace Technology (INTA). There, he joined in the Gaia mission from the European Space Agency (ESA) and started to participate in the Gaia Data Processing and Analysis Consortium (DPAC) as a member of “Astrophysical Parameters”, Coordination Unit (CU8). He has been involved in the “Object Clustering Analysis” (OCA) Development Unit since then. His research areas of interests include machine learning, metaheuristics, big data, time series forecasting, bioinformatics and astrostatistics.
Publications
2021 |
F. Divina and F. Gómez-Vela and M. García-Torres Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast Journal Article Applied Sciences, 11 (3), pp. 1261, 2021. @article{divina2021advanced, title = {Advanced Optimization Methods and Big Data Applications in Energy Demand Forecast}, author = {F. Divina and F. Gómez-Vela and M. García-Torres}, url = {https://www.mdpi.com/2076-3417/11/3/1261/htm}, doi = {10.3390/app11031261}, year = {2021}, date = {2021-01-01}, journal = {Applied Sciences}, volume = {11}, number = {3}, pages = {1261}, publisher = {Multidisciplinary Digital Publishing Institute}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2020 |
F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting Journal Article Applied Sciences, 10 (16), pp. 5487, 2020. @article{DIVINA2020, title = {Hybridizing deep learning and neuroevolution: Application to the Spanish short-term electric energy consumption forecasting}, author = {F. Divina and J. F. Torres and M. García-Torres and F. Martínez-Álvarez and A. Troncoso}, url = {https://www.mdpi.com/2076-3417/10/16/5487}, doi = {https://doi.org/10.3390/app10165487}, year = {2020}, date = {2020-07-30}, journal = {Applied Sciences}, volume = {10}, number = {16}, pages = {5487}, abstract = {The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The electric energy production would be much more efficient if accurate estimations of the future demand were available, since these would allow allocating only the resources needed for the production of the right amount of energy required. With this motivation in mind, we propose a strategy, based on neuroevolution, that can be used to this aim. Our proposal uses a genetic algorithm in order to find a sub-optimal set of hyper-parameters for configuring a deep neural network, which can then be used for obtaining the forecasting. Such a strategy is justified by the observation that the performances achieved by deep neural networks are strongly dependent on the right setting of the hyper-parameters, and genetic algorithms have shown excellent search capabilities in huge search spaces. Moreover, we base our proposal on a distributed computing platform, which allows its use on a large time-series. In order to assess the performances of our approach, we have applied it to a large dataset, related to the electric energy consumption registered in Spain over almost 10 years. Experimental results confirm the validity of our proposal since it outperforms all other forecasting techniques to which it has been compared. |
F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks Journal Article Genes, 11 (7), pp. 831-864, 2020. @article{Delgado-Chaves20, title = {Computational Analysis of the Global Effects of Ly6E in the Immune Response to Coronavirus Infection Using Gene Networks}, author = {F. M. Delgado-Chaves and F. Gómez-Vela and F. Divina and M. García-Torres and D. S. Rodríguez-Baena}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {7}, pages = {831-864}, abstract = {Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Gene networks have arisen as a promising tool in the comprehensive modeling and analysis of complex diseases. Particularly in viral infections, the understanding of the host-pathogen mechanisms, and the immune response to these, is considered a major goal for the rational design of appropriate therapies. For this reason, the use of gene networks may well encourage therapy-associated research in the context of the coronavirus pandemic, orchestrating experimental scrutiny and reducing costs. In this work, gene co-expression networks were reconstructed from RNA-Seq expression data with the aim of analyzing the time-resolved effects of gene Ly6E in the immune response against the coronavirus responsible for murine hepatitis (MHV). Through the integration of differential expression analyses and reconstructed networks exploration, significant differences in the immune response to virus were observed in Ly6E∆HSC compared to wild type animals. Results show that Ly6E ablation at hematopoietic stem cells (HSCs) leads to a progressive impaired immune response in both liver and spleen. Specifically, depletion of the normal leukocyte mediated immunity and chemokine signaling is observed in the liver of Ly6E∆HSC mice. On the other hand, the immune response in the spleen, which seemed to be mediated by an intense chromatin activity in the normal situation, is replaced by ECM remodeling in Ly6E∆HSC mice. These findings, which require further experimental characterization, could be extrapolated to other coronaviruses and motivate the efforts towards novel antiviral approaches. |
D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo Identifying livestock behavior patterns based on accelerometer dataset Journal Article Journal of Computational Science, 41 , pp. 101076, 2020. @article{Rodriguez-Baena20, title = {Identifying livestock behavior patterns based on accelerometer dataset}, author = {D. S. Rodríguez-Baena and F. Gómez-Vela and M. García-Torres and F. Divina and C. D. Barranco and N- Díaz-Díaz and M. Jimenez and G. Montalvo}, url = {https://doi.org/10.1016/j.jocs.2020.101076}, doi = {10.1016/j.jocs.2020.101076}, year = {2020}, date = {2020-01-01}, journal = {Journal of Computational Science}, volume = {41}, pages = {101076}, abstract = {In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In large livestock farming it would be beneficial to be able to automatically detect behaviors in animals. In fact, this would allow to estimate the health status of individuals, providing valuable insight to stock raisers. Traditionally this process has been carried out manually, relying only on the experience of the breeders. Such an approach is effective for a small number of individuals. However, in large breeding farms this may not represent the best approach, since, in this way, not all the animals can be effectively monitored all the time. Moreover, the traditional approach heavily rely on human experience, which cannot be always taken for granted. To this aim, in this paper, we propose a new method for automatically detecting activity and inactivity time periods of animals, as a behavior indicator of livestock. In order to do this, we collected data with sensors located in the body of the animals to be analyzed. In particular, the reliability of the method was tested with data collected on Iberian pigs and calves. Results confirm that the proposed method can help breeders in detecting activity and inactivity periods for large livestock farming. |
T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions Journal Article Genes, 11 (9), pp. 985, 2020. @article{Vanhaeren20, title = {A Comparative Study of Supervised Machine Learning Algorithms for the Prediction of Long-Range Chromatin Interactions}, author = {T. Vanhaeren and F. Divina and M. García-Torres and F. Gómez-Vela and W. Vanhoof and P. M. Martínez-García}, year = {2020}, date = {2020-01-01}, journal = {Genes}, volume = {11}, number = {9}, pages = {985}, abstract = {The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin}, keywords = {}, pubstate = {published}, tppubtype = {article} } The role of three-dimensional genome organization as a critical regulator of gene expression has become increasingly clear over the last decade. Most of our understanding of this association comes from the study of long range chromatin interaction maps provided by Chromatin Conformation Capture-based techniques, which have greatly improved in recent years. Since these procedures are experimentally laborious and expensive, in silico prediction has emerged as an alternative strategy to generate virtual maps in cell types and conditions for which experimental data of chromatin interactions is not available. Several methods have been based on predictive models trained on one-dimensional (1D) sequencing features, yielding promising results. However, different approaches vary both in the way they model chromatin interactions and in the machine learning-based strategy they rely on, making it challenging to carry out performance comparison of existing methods. In this study, we use publicly available 1D sequencing signals to model cohesin-mediated chromatin interactions in two human cell lines and evaluate the prediction performance of six popular machine learning algorithms: decision trees, random forests, gradient boosting, support vector machines, multi-layer perceptron and deep learning. Our approach accurately predicts long-range interactions and reveals that gradient boosting significantly outperforms the other five methods, yielding accuracies of about 95%. We show that chromatin features in close genomic proximity to the anchors cover most of the predictive information, as has been previously reported. Moreover, we demonstrate that gradient boosting models trained with different subsets of chromatin features, unlike the other methods tested, are able to produce accurate predictions. In this regard, and besides architectural proteins, transcription factors are shown to be highly informative. Our study provides a framework for the systematic prediction of long-range chromatin interactions, identifies gradient boosting as the best suited algorithm for this task and highlights cell-type specific binding of transcription factors at the anchors as important determinants of chromatin wiring mediated by cohesin |
F. Daumas-Ladouce and M. García-Torres and J.L. Vázquez Noguera and D. P. Pinto-Roa and H. Legal-Alaya Multi-Objective Pareto Histogram Equalization Journal Article Electronic Notes in Theoretical Computer Science, 349 , pp. 3-23, 2020. @article{Daumas-Ladouce20, title = {Multi-Objective Pareto Histogram Equalization}, author = {F. Daumas-Ladouce and M. García-Torres and J.L. Vázquez Noguera and D. P. Pinto-Roa and H. Legal-Alaya}, year = {2020}, date = {2020-01-01}, journal = {Electronic Notes in Theoretical Computer Science}, volume = {349}, pages = {3-23}, abstract = {Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Several histogram equalization methods focus on enhancing the contrast as one of their main objectives, but usually without considering the details of the input image. Other methods seek to keep the brightness while improving the contrast, causing distortion. Among the multi-objective algorithms, the classical optimization (a priori) techniques are commonly used given their simplicity. One of the most representative method is the weighted sum of metrics used to enhance the contrast of an image. These type of techniques, beside just returning a single image, have problems related to the weight assignment for each selected metric. To avoid the pitfalls of the algorithms just mentioned, we propose a new method called MOPHE (MultiObjective Pareto Histogram Equalization) which is based on Multi-objective Particle Swarm Optimization (MOPSO) approach combining different metrics in a posteriori selection criteria context. The goal of this method is three-fold: (1) improve the contrast (2) without losing important details, (3) avoiding an excessive distortion. MOPHE, is a pure multi-objective optimization algorithm, consequently a set of tradeoff optimal solutions are generated, thus providing alternative solutions to the decision-maker, allowing the selection of one or more resulting images, depending on the application needs. Experimental results indicate that MOPHE is a promising approach, as it calculates a set of trade-off optimal solutions that are better than the results obtained from representative algorithms from the state-of-the-art regarding visual quality and metrics measurement. |
C. Lezcano and J.L. Vázquez Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor A multi-objective approach for designing optimized operation sequence on binary image processing Journal Article Heliyon, 6 (4), pp. e03670, 2020. @article{Lezcano20, title = {A multi-objective approach for designing optimized operation sequence on binary image processing}, author = {C. Lezcano and J.L. Vázquez Noguera and D. P. Pinto-Roa and M. García-Torres and C. Gaona and P. E. Gardel-Sotomayor}, year = {2020}, date = {2020-01-01}, journal = {Heliyon}, volume = {6}, number = {4}, pages = {e03670}, abstract = {In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In binary image segmentation, the choice of the order of the operation sequence may yield to suboptimal results. In this work, we propose to tackle the associated optimization problem via multi-objective approach. Given the original image, in combination with a list of morphological, logical and stacking operations, the goal is to obtain the ideal output at the lowest computational cost. We compared the performance of two Multi-objective Evolutionary Algorithms (MOEAs): the Non-dominated Sorting Genetic Algorithm (NSGA-II) and the Strength Pareto Evolutionary Algorithm 2 (SPEA2). NSGA-II has better results in most cases, but the difference does not reach statistical significance. The results show that the similarity measure and the computational cost are objective functions in conflict, while the number of operations available and type of input images impact on the quality of Pareto set. |
2019 |
M. García-Torres and D. Becerra-Alonso and F. A Gómez-Vela and F. Divina and I. López Cobo and F. Martínez-Álvarez Analysis of Student Achievement Scores: A Machine Learning Approach Conference ICEUTE 10th International Conference on EUropean Transnational Education, Advances in Intelligent Systems and Computing 2019. @conference{Garcia2019, title = {Analysis of Student Achievement Scores: A Machine Learning Approach}, author = {M. García-Torres and D. Becerra-Alonso and F. A Gómez-Vela and F. Divina and I. López Cobo and F. Martínez-Álvarez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_28}, year = {2019}, date = {2019-01-01}, booktitle = {ICEUTE 10th International Conference on EUropean Transnational Education}, pages = {275-284}, series = {Advances in Intelligent Systems and Computing}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
F. Gómez-Vela and F. M Delgado-Chaves and D.S. Rodríguez-Baena and M. García-Torres and F. Divina Ensemble and Greedy Approach for the Reconstruction of Large Gene Co-Expression Networks Journal Article Entropy, 21 (12), pp. 1139, 2019. @article{Entropy2019, title = {Ensemble and Greedy Approach for the Reconstruction of Large Gene Co-Expression Networks}, author = {F. Gómez-Vela and F. M Delgado-Chaves and D.S. Rodríguez-Baena and M. García-Torres and F. Divina}, url = {https://www.mdpi.com/1099-4300/21/12/1139}, doi = {https://doi.org/10.3390/e21121139}, year = {2019}, date = {2019-01-01}, journal = {Entropy}, volume = {21}, number = {12}, pages = {1139}, abstract = {Gene networks have become a powerful tool in the comprehensive analysis of gene expression. Due to the increasing amount of available data, computational methods for networks generation must deal with the so-called curse of dimensionality in the quest for the reliability of the obtained results. In this context, ensemble strategies have significantly improved the precision of results by combining different measures or methods. On the other hand, structure optimization techniques are also important in the reduction of the size of the networks, not only improving their topology but also keeping a positive prediction ratio. In this work, we present Ensemble and Greedy networks (EnGNet), a novel two-step method for gene networks inference. First, EnGNet uses an ensemble strategy for co-expression networks generation. Second, a greedy algorithm optimizes both the size and the topological features of the network. Not only do achieved results show that this method is able to obtain reliable networks, but also that it significantly improves topological features. Moreover, the usefulness of the method is proven by an application to a human dataset on post-traumatic stress disorder, revealing an innate immunity-mediated response to this pathology. These results are indicative of the method’s potential in the field of biomarkers discovery and characterization.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Gene networks have become a powerful tool in the comprehensive analysis of gene expression. Due to the increasing amount of available data, computational methods for networks generation must deal with the so-called curse of dimensionality in the quest for the reliability of the obtained results. In this context, ensemble strategies have significantly improved the precision of results by combining different measures or methods. On the other hand, structure optimization techniques are also important in the reduction of the size of the networks, not only improving their topology but also keeping a positive prediction ratio. In this work, we present Ensemble and Greedy networks (EnGNet), a novel two-step method for gene networks inference. First, EnGNet uses an ensemble strategy for co-expression networks generation. Second, a greedy algorithm optimizes both the size and the topological features of the network. Not only do achieved results show that this method is able to obtain reliable networks, but also that it significantly improves topological features. Moreover, the usefulness of the method is proven by an application to a human dataset on post-traumatic stress disorder, revealing an innate immunity-mediated response to this pathology. These results are indicative of the method’s potential in the field of biomarkers discovery and characterization. |
F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings Journal Article Applied Sciences, 12 (10), pp. 1934, 2019. @article{Energies2019b, title = {A Comparative Study of Time Series Forecasting Methods for Short Term Electric Energy Consumption Prediction in Smart Buildings}, author = {F. Divina and M. García-Torres and F. Goméz-Vela and J.L. Vázquez Noguera}, url = {https://www.mdpi.com/1996-1073/12/10/1934}, doi = {https://doi.org/10.3390/en12101934}, year = {2019}, date = {2019-01-01}, journal = {Applied Sciences}, volume = {12}, number = {10}, pages = {1934}, abstract = {Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Smart buildings are equipped with sensors that allow monitoring a range of building systems including heating and air conditioning, lighting and the general electric energy consumption. Thees data can then be stored and analyzed. The ability to use historical data regarding electric energy consumption could allow improving the energy efficiency of such buildings, as well as help to spot problems related to wasting of energy. This problem is even more important when considering that buildings are some of the largest consumers of energy. In this paper, we are interested in forecasting the energy consumption of smart buildings, and, to this aim, we propose a comparative study of different forecasting strategies that can be used to this aim. To do this, we used the data regarding the electric consumption registered by thirteen buildings located in a university campus in the south of Spain. The empirical comparison of the selected methods on the different data showed that some methods are more suitable than others for this kind of problem. In particular, we show that strategies based on Machine Learning approaches seem to be more suitable for this task. |
G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem Journal Article Information Sciences, 494 , pp. 1–20, 2019. @article{IS-2019, title = {A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem}, author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina}, url = {https://www.sciencedirect.com/science/article/pii/S0020025519303603}, doi = {https://doi.org/10.1016/j.ins.2019.04.046}, year = {2019}, date = {2019-01-01}, journal = {Information Sciences}, volume = {494}, pages = {1--20}, abstract = {In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method. |
V.E. Jiménez Chaves and M. García-Torres and J.L. Vázquez Noguera and C.D. Cabrera Oviedo and A.P. Riego Esteche and F. Divina and M. Marrufo-Vázquez International Joint Conference: 12th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2019) and 10th International Conference on EUropean Transnational Education (ICEUTE 2019), 2019. @conference{Chaves2019, title = {Analysis of Teacher Training in Mathematics in Paraguay’s Elementary Education System Using Machine Learning Techniques}, author = {V.E. Jiménez Chaves and M. García-Torres and J.L. Vázquez Noguera and C.D. Cabrera Oviedo and A.P. Riego Esteche and F. Divina and M. Marrufo-Vázquez}, url = {https://link.springer.com/chapter/10.1007/978-3-030-20005-3_29}, year = {2019}, date = {2019-01-01}, booktitle = {International Joint Conference: 12th International Conference on Computational Intelligence in Security for Information Systems (CISIS 2019) and 10th International Conference on EUropean Transnational Education (ICEUTE 2019)}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
F. M Delgado-Chaves and F. Gómez-Vela and M. García-Torres and F. Divina and J.L. Vázquez Noguera Computational Inference of Gene Co-Expression Networks for the identification of Lung Carcinoma Biomarkers: An Ensemble Approach Journal Article Genes, 10 (12), pp. 962, 2019. @article{Genes2019, title = {Computational Inference of Gene Co-Expression Networks for the identification of Lung Carcinoma Biomarkers: An Ensemble Approach}, author = {F. M Delgado-Chaves and F. Gómez-Vela and M. García-Torres and F. Divina and J.L. Vázquez Noguera}, url = {https://www.mdpi.com/2073-4425/10/12/962}, doi = {https://doi.org/10.3390/genes10120962}, year = {2019}, date = {2019-01-01}, journal = {Genes}, volume = {10}, number = {12}, pages = {962}, abstract = {Gene Networks (GN), have emerged as an useful tool in recent years for the analysis of different diseases in the field of biomedicine. In particular, GNs have been widely applied for the study and analysis of different types of cancer. In this context, Lung carcinoma is among the most common cancer types and its short life expectancy is partly due to late diagnosis. For this reason, lung cancer biomarkers that can be easily measured are highly demanded in biomedical research. In this work, we present an application of gene co-expression networks in the modelling of lung cancer gene regulatory networks, which ultimately served to the discovery of new biomarkers. For this, a robust GN inference was performed from microarray data concomitantly using three different co-expression measures. Results identified a major cluster of genes involved in SRP-dependent co-translational protein target to membrane, as well as a set of 28 genes that were exclusively found in networks generated from cancer samples. Amongst potential biomarkers, genes NCKAP1L and DMD are highlighted due to their implications in a considerable portion of lung and bronchus primary carcinomas. These findings demonstrate the potential of GN reconstruction in the rational prediction of biomarkers.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Gene Networks (GN), have emerged as an useful tool in recent years for the analysis of different diseases in the field of biomedicine. In particular, GNs have been widely applied for the study and analysis of different types of cancer. In this context, Lung carcinoma is among the most common cancer types and its short life expectancy is partly due to late diagnosis. For this reason, lung cancer biomarkers that can be easily measured are highly demanded in biomedical research. In this work, we present an application of gene co-expression networks in the modelling of lung cancer gene regulatory networks, which ultimately served to the discovery of new biomarkers. For this, a robust GN inference was performed from microarray data concomitantly using three different co-expression measures. Results identified a major cluster of genes involved in SRP-dependent co-translational protein target to membrane, as well as a set of 28 genes that were exclusively found in networks generated from cancer samples. Amongst potential biomarkers, genes NCKAP1L and DMD are highlighted due to their implications in a considerable portion of lung and bronchus primary carcinomas. These findings demonstrate the potential of GN reconstruction in the rational prediction of biomarkers. |
2018 |
P. Manuel Martínez-García and M. García-Torres and F. Divina and F. Gómez-Vela and F. Cortés-Ledesma International Conference on the Applications of Evolutionary Computation, 2018. @conference{Top2B2018b, title = {Analysis of Relevance and Redundance on Topoisomerase 2b (TOP2B) Binding Sites: A Feature Selection Approach}, author = {P. Manuel Martínez-García and M. García-Torres and F. Divina and F. Gómez-Vela and F. Cortés-Ledesma}, url = {https://link.springer.com/chapter/10.1007/978-3-319-77538-8_7}, year = {2018}, date = {2018-01-01}, booktitle = {International Conference on the Applications of Evolutionary Computation}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |
G. Sosa-Cabrera and M. García-Torres and S. Gómez Guerrero and C.E. Schaerer and F. Divina Understanding a multivariate semi-metric in the search strategies for attributes subset selection Conference Proceeding Series of the Brazilian Society of Computational and Applied Mathematics, 2018. @conference{Sosa2018b, title = {Understanding a multivariate semi-metric in the search strategies for attributes subset selection}, author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez Guerrero and C.E. Schaerer and F. Divina}, url = {https://proceedings.sbmac.emnuvens.com.br/sbmac/article/view/2506}, year = {2018}, date = {2018-01-01}, booktitle = {Proceeding Series of the Brazilian Society of Computational and Applied Mathematics}, keywords = {}, pubstate = {published}, tppubtype = {conference} } |