@article{Mortazavi21,
title = {Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer},
author = {R. Mortazavi and S. Mortazavi and A. Troncoso},
url = {https://link.springer.com/article/10.1007/s00366-020-01226-1},
doi = {10.1007/s00366-020-01226-1},
year = {2021},
date = {2021-01-01},
journal = {Engineering with Computers},
volume = {in press},
abstract = {This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy.
@article{IS-2019,
title = {A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem},
author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina},
url = {https://www.sciencedirect.com/science/article/pii/S0020025519303603},
doi = {https://doi.org/10.1016/j.ins.2019.04.046},
year = {2019},
date = {2019-01-01},
journal = {Information Sciences},
volume = {494},
pages = {1--20},
abstract = {In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method.
@conference{Sosa2018b,
title = {Understanding a multivariate semi-metric in the search strategies for attributes subset selection},
author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez Guerrero and C.E. Schaerer and F. Divina},
url = {https://proceedings.sbmac.emnuvens.com.br/sbmac/article/view/2506},
year = {2018},
date = {2018-01-01},
booktitle = {Proceeding Series of the Brazilian Society of Computational and Applied Mathematics},
keywords = {feature selection},
pubstate = {published},
tppubtype = {conference}
}
@article{IS:GT-2013,
title = {Comparison of metaheuristic strategies for peakbin selection in proteomic mass spectrometry data},
author = {M. García-Torres and R. Arma{~n}anzas and C. Bielza and P. Larra~naga},
url = {https://www.sciencedirect.com/science/article/pii/S0020025510006195},
doi = {10.1016/j.ins.2010.12.013},
year = {2013},
date = {2013-01-01},
journal = {Information Sciences},
volume = {222},
pages = {229-246},
keywords = {bioinformatics, feature selection},
pubstate = {published},
tppubtype = {article}
}
@article{ESA:Rod-2012,
title = {Fast feature selection aimed at high dimensional data via hybrid-sequential-ranked searches},
author = {R. Ruíz and J. Riquelme and J. Aguilar-Ruíz and M. García-Torres},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417412005842},
doi = {10.1016/j.eswa.2012.03.061},
year = {2012},
date = {2012-01-01},
journal = {Expert Systems with Applications},
volume = {39},
number = {12},
pages = {11094-11102},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
@article{EJOR:GL-2006,
title = {Solving the Feature Selection Problem by a Parallel Scatter Search},
author = {F.~C. García-López and M. García-Torres and B. Melián-Batista and J.~A. Moreno Pérez and J.~M. Moreno-Vega},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0377221704005491},
doi = {10.1016/j.ejor.2004.08.010},
year = {2006},
date = {2006-01-01},
journal = {European Journal of Operations Research},
volume = {169},
number = {2},
pages = {477-489},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
We use cookies to ensure that we give you the best experience on our website. If you continue to use this site we will assume that you are happy with it.Ok