Publications
2025
A.M. Chacón-Maldonado and A.R. Troncoso-García and G. Asencio-Cortés and A. Troncoso
Improving monsoon forecasting based on feature selection and explainable artificial intelligence Journal Article
In: Applied Soft Computing, vol. 185, pp. 114053, 2025.
Links | BibTeX | Tags: feature selection, natural disasters, XAI
@article{ASOC2024,
title = {Improving monsoon forecasting based on feature selection and explainable artificial intelligence},
author = {A.M. Chacón-Maldonado and A.R. Troncoso-García and G. Asencio-Cortés and A. Troncoso},
url = {https://www.sciencedirect.com/science/article/pii/S1568494625013663},
doi = {10.1016/j.asoc.2025.114053},
year = {2025},
date = {2025-12-02},
urldate = {2025-12-02},
journal = {Applied Soft Computing},
volume = {185},
pages = {114053},
keywords = {feature selection, natural disasters, XAI},
pubstate = {published},
tppubtype = {article}
}
N. Ullah and F. Guzmán-Aroca and F. Martínez-Álvarez and I. De Falco and G. Sannino
A Novel Explainable AI Framework for Medical Image Classification Integrating Statistical, Visual, and Rule-Based Methods Journal Article
In: Medical Image Analysis, vol. 105, pp. 103665, 2025.
Abstract | Links | BibTeX | Tags: association rules, deep learning, feature selection, XAI
@article{ULLAH25,
title = {A Novel Explainable AI Framework for Medical Image Classification Integrating Statistical, Visual, and Rule-Based Methods},
author = {N. Ullah and F. Guzmán-Aroca and F. Martínez-Álvarez and I. De Falco and G. Sannino},
url = {https://www.sciencedirect.com/science/article/pii/S1361841525002129},
doi = {https://doi.org/10.1016/j.media.2025.103665},
year = {2025},
date = {2025-06-06},
urldate = {2025-06-06},
journal = {Medical Image Analysis},
volume = {105},
pages = {103665},
abstract = {Artificial intelligence and deep learning are powerful tools for extracting knowledge from large datasets, particularly in healthcare. However, their black-box nature raises interpretability concerns, especially in highstakes applications. Existing eXplainable Artificial Intelligence methods often focus solely on visualization or rule-based explanations, limiting interpretability’s depth and clarity. This work proposes a novel explainable AI method specifically designed for medical image analysis, integrating statistical, visual, and rule-based explanations to improve transparency in deep learning models. Statistical features are derived from deep features extracted using a custom Mobilenetv2 model. A two-step feature selection method—zero-based filtering with mutual importance selection—ranks and refines these features. Decision tree and RuleFit models
are employed to classify data and extract human-readable rules. Additionally, a novel statistical feature map overlay visualization generates heatmap-like representations of three key statistical measures (mean, skewness, and entropy), providing both localized and quantifiable visual explanations of model decisions. The proposed method has been validated on five medical imaging datasets—COVID-19 radiography, ultrasound
breast cancer, brain tumour magnetic resonance imaging, lung and colon cancer histopathological, and glaucoma images—with results confirmed by medical experts, demonstrating its effectiveness in enhancing interpretability for medical image classification tasks.},
keywords = {association rules, deep learning, feature selection, XAI},
pubstate = {published},
tppubtype = {article}
}
are employed to classify data and extract human-readable rules. Additionally, a novel statistical feature map overlay visualization generates heatmap-like representations of three key statistical measures (mean, skewness, and entropy), providing both localized and quantifiable visual explanations of model decisions. The proposed method has been validated on five medical imaging datasets—COVID-19 radiography, ultrasound
breast cancer, brain tumour magnetic resonance imaging, lung and colon cancer histopathological, and glaucoma images—with results confirmed by medical experts, demonstrating its effectiveness in enhancing interpretability for medical image classification tasks.
M. Garcia-Torres
Feature selection for high-dimensional data using a multivariate search space reduction strategy based scatter search Journal Article
In: Journal of Heuristics, vol. 31, no. 1, pp. 10, 2025.
Abstract | Links | BibTeX | Tags: feature selection
@article{garcia2025feature,
title = {Feature selection for high-dimensional data using a multivariate search space reduction strategy based scatter search},
author = {M. Garcia-Torres},
doi = {10.1007/s10732-025-09550-9},
year = {2025},
date = {2025-01-01},
journal = {Journal of Heuristics},
volume = {31},
number = {1},
pages = {10},
publisher = {Springer},
abstract = {In feature selection, the increasing of the dimensionality and the complexity of feature interactions make the problem challenging. Furthermore, searching for an optimal subset of features from a high-dimensional feature space is known to be an
NP-hard problem. To improve the efficiency and effectiveness of the search algorithm, feature grouping has emerged as a way to reduce the search space by clustering features according to a measure. In this work we propose to reduce the search space by
applying a greedy algorithm, called Multivariate Greedy Predominant Groups Generator (MGPGG). MGPGG extends the idea of the Greedy Predominant Groups Generator (GPGG) algorithm by taking into account feature interaction among three or more features. For
this purpose, MGPGG uses the Multivariate Symmetrical Uncertainty (MSU) to group features that share information about the class label. We also propose a Scatter Search strategy that integrates MGPGG to find small subsets of features with high predictive power.
The proposed algorithm, called Multivariate Predominant Group-based Scatter Search (MPGSS), is tested on high-dimensional data from biomedical and text-mining fields. The proposal is compared with state-of-the-art feature selection strategies. Results show that
MPGSS is competitive since it is capable of finding small subsets of features while keeping high predictive classification models. url = https://link.springer.com/article/10.1007/s10732-025-09550-9},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
NP-hard problem. To improve the efficiency and effectiveness of the search algorithm, feature grouping has emerged as a way to reduce the search space by clustering features according to a measure. In this work we propose to reduce the search space by
applying a greedy algorithm, called Multivariate Greedy Predominant Groups Generator (MGPGG). MGPGG extends the idea of the Greedy Predominant Groups Generator (GPGG) algorithm by taking into account feature interaction among three or more features. For
this purpose, MGPGG uses the Multivariate Symmetrical Uncertainty (MSU) to group features that share information about the class label. We also propose a Scatter Search strategy that integrates MGPGG to find small subsets of features with high predictive power.
The proposed algorithm, called Multivariate Predominant Group-based Scatter Search (MPGSS), is tested on high-dimensional data from biomedical and text-mining fields. The proposal is compared with state-of-the-art feature selection strategies. Results show that
MPGSS is competitive since it is capable of finding small subsets of features while keeping high predictive classification models. url = https://link.springer.com/article/10.1007/s10732-025-09550-9
2024
M. J. Jiménez-Navarro and A.R. Troncoso-García and A. Troncoso and F. Martínez-Álvarez and M. Martínez-Ballesteros
Explainable Deep Learning with Embedded Feature Selection for Electricity Demand Forecasting Conference
SST International Conference on Smart Systems and Technologies, 2024.
Abstract | Links | BibTeX | Tags: deep learning, energy, feature selection, XAI
@conference{SST2024,
title = {Explainable Deep Learning with Embedded Feature Selection for Electricity Demand Forecasting},
author = {M. J. Jiménez-Navarro and A.R. Troncoso-García and A. Troncoso and F. Martínez-Álvarez and M. Martínez-Ballesteros},
url = {https://ieeexplore.ieee.org/document/10755283},
doi = {10.1109/SST61991.2024.10755283},
year = {2024},
date = {2024-10-16},
urldate = {2024-10-16},
booktitle = {SST International Conference on Smart Systems and Technologies},
pages = {153-158},
abstract = {Electricity demand forecasting is an important part of the energy industry strategy. Accurate predictions are crucial for maintaining a stable energy supply, planning production, managing distribution, preventing grid overloads, integrating renewable energy sources, and reducing costs and environmental impact. Machine learning and, in particular, deep learning are promising techniques to improve the prediction accuracy of electric demand, but face challenges related to a lack of interpretability due to the “black box” nature of some models. Feature selection methods address these issues by identifying relevant features and simplifying the learning process. This paper aims to explain the most critical lags that impact electric demand forecasting in Spain using the temporal selection layer technique within deep learning models for time series forecasting. This technique transforms a neural network into a model with embedded feature selection, aiming to enhance efficacy and interpretability while reducing computational costs. The results were compared with other methods that incorporate an embedded feature selection mechanism to select the best model. Furthermore, an explainable technique is used to assess the feature importance in the best model over the last year to understand how input features influence electric demand forecasting and provide insights into their contributions and interactions. The results show that our approach improves both the efficacy and interpretability in the context of electric demand forecasting.},
keywords = {deep learning, energy, feature selection, XAI},
pubstate = {published},
tppubtype = {conference}
}
F. Rodríguez-Díaz and A. M. Chacón-Maldonado and A. R. Troncoso-García and G. Asencio-Cortés
Explainable Olive grove and Grapevine pest forecasting through machine learning-based classification and regression Journal Article
In: Results in Engineering, vol. 24, pp. 103058, 2024.
Abstract | Links | BibTeX | Tags: deep learning, feature selection, time series, XAI
@article{RODRIGUEZ24,
title = {Explainable Olive grove and Grapevine pest forecasting through machine learning-based classification and regression},
author = {F. Rodríguez-Díaz and A. M. Chacón-Maldonado and A. R. Troncoso-García and G. Asencio-Cortés},
url = {https://www.sciencedirect.com/science/article/pii/S2590123024013136},
doi = {https://doi.org/10.1016/j.rineng.2024.103058},
year = {2024},
date = {2024-09-09},
urldate = {2024-09-09},
journal = {Results in Engineering},
volume = {24},
pages = {103058},
abstract = {Pests significantly impact agricultural productivity, making early detection crucial for maximizing yields. This paper explores the use of machine learning models to predict olive fly and red spider mite infestations in Andalusia. Four datasets on crop phenology, pest populations, and damage levels were used, with models developed using the Python package H20, which focuses on interpretability through SHAP values and ICE plots. The results showed high precision in predicting pest outbreaks, particularly for the olive fly, with minimal differences between models using feature selection. In the vineyard dataset, the selection of characteristics improved the performance of the model by reducing the MAE and increasing R2. Explainability techniques identified solar radiation and wind direction as key factors in olive fly predictions, while past pest occurrences and wind velocity were influential for red spider mites, providing farmers with actionable insights for timely pest control.},
keywords = {deep learning, feature selection, time series, XAI},
pubstate = {published},
tppubtype = {article}
}
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Brito and F. Martínez-Álvarez and G. Asencio-Cortés
Embedded feature selection for neural networks via learnable drop layer Journal Article
In: Logic Journal of the IGPL, pp. jzae062, 2024.
Abstract | Links | BibTeX | Tags: deep learning, feature selection, time series
@article{JIMENEZ-NAVARRO24b,
title = {Embedded feature selection for neural networks via learnable drop layer},
author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Brito and F. Martínez-Álvarez and G. Asencio-Cortés},
url = {https://academic.oup.com/jigpal/advance-article/doi/10.1093/jigpal/jzae062/7689640},
doi = {https://doi.org/10.1093/jigpal/jzae062},
year = {2024},
date = {2024-07-06},
urldate = {2024-07-06},
journal = {Logic Journal of the IGPL},
pages = {jzae062},
abstract = {Feature selection is a widely studied technique whose goal is to reduce the dimensionality of the problem by removing irrelevant features. It has multiple benefits, such as improved efficacy, efficiency and interpretability of almost any type of machine learning model. Feature selection techniques may be divided into three main categories, depending on the process used to remove the features known as Filter, Wrapper and Embedded. Embedded methods are usually the preferred feature selection method that efficiently obtains a selection of the most relevant features of the model. However, not all models support an embedded feature selection that forces the use of a different method, reducing the efficiency and reliability of the selection. Neural networks are an example of a model that does not support embedded feature selection. As neural networks have shown to provide remarkable results in multiple scenarios such as classification and regression, sometimes in an ensemble with a model that includes an embedded feature selection, we attempt to embed a feature selection process with a general-purpose methodology. In this work, we propose a novel general-purpose layer for neural networks that removes the influence of irrelevant features. The Feature-Aware Drop Layer is included at the top of the neural network and trained during the backpropagation process without any additional parameters. Our methodology is tested with 17 datasets for classification and regression tasks, including data from different fields such as Health, Economic and Environment, among others. The results show remarkable improvements compared to three different feature selection approaches, with reliable, efficient and effective results.},
keywords = {deep learning, feature selection, time series},
pubstate = {published},
tppubtype = {article}
}
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés
Explaining deep learning models for ozone pollution prediction via embedded feature selection Journal Article
In: Applied Soft Computing, vol. 157, pp. 111504, 2024.
Abstract | Links | BibTeX | Tags: deep learning, feature selection, time series, XAI
@article{JIMENEZ-NAVARRO24,
title = {Explaining deep learning models for ozone pollution prediction via embedded feature selection},
author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés},
url = {https://www.sciencedirect.com/science/article/pii/S1568494624002783},
doi = {https://doi.org/10.1016/j.asoc.2024.111504},
year = {2024},
date = {2024-04-04},
journal = {Applied Soft Computing},
volume = {157},
pages = {111504},
abstract = {Ambient air pollution is a pervasive global issue that poses significant health risks. Among pollutants, ozone (O3) is responsible for an estimated 1 to 1.2 million premature deaths yearly. Furthermore, O3 adversely affects climate warming, crop productivity, and more. Its formation occurs when nitrogen oxides and volatile organic compounds react with short-wavelength solar radiation. Consequently, urban areas with high traffic volume and elevated temperatures are particularly prone to elevated O3 levels, which pose a significant health risk to their inhabitants. In response to this problem, many countries have developed web and mobile applications that provide real-time air pollution information using sensor data. However, while these applications offer valuable insight into current pollution levels, predicting future pollutant behavior is crucial for effective planning and mitigation strategies. Therefore, our main objectives are to develop accurate and efficient prediction models and identify the key factors that influence O3 levels. We adopt a time series forecasting approach to address these objectives, which allows us to analyze and predict O3 future behavior. Additionally, we tackle the feature selection problem to identify the most relevant features and periods that contribute to prediction accuracy by introducing a novel method called the Time Selection Layer in Deep Learning models, which significantly improves model performance, reduces complexity, and enhances interpretability. Our study focuses on data collected from five representative areas in Seville, Cordova, and Jaen provinces in Spain, using multiple sensors to capture comprehensive pollution data. We compare the performance of three models: Lasso, Decision Tree, and Deep Learning with and without incorporating the Time Selection Layer. Our results demonstrate that including the Time Selection Layer significantly enhances the effectiveness and interpretability of Deep Learning models, achieving an average effectiveness improvement of 9% across all monitored areas.},
keywords = {deep learning, feature selection, time series, XAI},
pubstate = {published},
tppubtype = {article}
}
M. García-Torres and D. P. Pinto-Roa and C. Núñez-Castillo and B. Quiñonez and G. Vázquez and M. Allegretti and M. E. García-Diaz
Feature selection applied to QoS/QoE modeling on video and web-based mobile data services: An ordinal approach Journal Article
In: Computer Communications, 2024.
Abstract | Links | BibTeX | Tags: feature selection
@article{garcia2024feature,
title = {Feature selection applied to QoS/QoE modeling on video and web-based mobile data services: An ordinal approach},
author = {M. García-Torres and D. P. Pinto-Roa and C. Núñez-Castillo and B. Quiñonez and G. Vázquez and M. Allegretti and M. E. García-Diaz},
url = {https://www.sciencedirect.com/science/article/pii/S0140366424000410},
doi = {10.1016/j.comcom.2024.02.004},
year = {2024},
date = {2024-01-01},
journal = {Computer Communications},
publisher = {Elsevier},
abstract = {Nowadays, mobile service providers perceive the user experience as a reliable indicator of the quality associated to a service. Given a set of Quality of Service (QoS) factors, the aim is to predict the Quality of Experience (QoE), measured in terms of the Mean Opinion Score (MOS). Although this problem is receiving much attention, there are still some challenges that require more research in order to find effective solutions for meeting user’s expectation in terms of service quality. A core challenge in this topic refers to the analysis of the contribution of each factor to the QoS/QoE Model. In this work, we study the mapping between QoS and QoE on video and web-based services using a machine learning approach. For such purpose, we design a lab-testing methodology to emulate different cellular transmission network scenarios. Then, we address the problem of inducing a predictive model and identifying relevant QoS factors. Results suggest that bandwidth is a key factor when analyzing user’s perception of service quality.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
G. Sosa-Cabrera and S. Gómez-Guerrero and M. García-Torres and C. E Schaerer
Feature selection: A perspective on inter-attribute cooperation Journal Article
In: International Journal of Data Science and Analytics, vol. 17, no. 2, pp. 139–151, 2024.
Abstract | Links | BibTeX | Tags: feature selection
@article{sosa2024feature,
title = {Feature selection: A perspective on inter-attribute cooperation},
author = { G. Sosa-Cabrera and S. Gómez-Guerrero and M. García-Torres and C. E Schaerer},
url = {https://link.springer.com/article/10.1007/s41060-023-00439-z},
doi = {10.1007/s41060-023-00439-z},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {International Journal of Data Science and Analytics},
volume = {17},
number = {2},
pages = {139–151},
publisher = {Springer},
abstract = {High-dimensional datasets depict a challenge for learning
tasks in data mining and machine learning. Feature selection is an
effective technique in dealing with dimensionality reduction. It is
often an essential data processing step prior to applying a learning
algorithm. Over the decades, filter feature selection methods have
evolved from simple univariate relevance ranking algorithms to more
sophisticated relevance-redundancy trade-offs and to multivariate
dependencies-based approaches in recent years. This tendency to capture
multivariate dependence aims at obtaining unique information about the
class from the intercooperation among features. This paper presents a
comprehensive survey of the state-of-the-art work on filter feature
selection methods assisted by feature intercooperation, and summarizes
the contributions of different approaches found in the literature.
Furthermore, current issues and challenges are introduced to identify
promising future research and development.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
tasks in data mining and machine learning. Feature selection is an
effective technique in dealing with dimensionality reduction. It is
often an essential data processing step prior to applying a learning
algorithm. Over the decades, filter feature selection methods have
evolved from simple univariate relevance ranking algorithms to more
sophisticated relevance-redundancy trade-offs and to multivariate
dependencies-based approaches in recent years. This tendency to capture
multivariate dependence aims at obtaining unique information about the
class from the intercooperation among features. This paper presents a
comprehensive survey of the state-of-the-art work on filter feature
selection methods assisted by feature intercooperation, and summarizes
the contributions of different approaches found in the literature.
Furthermore, current issues and challenges are introduced to identify
promising future research and development.
2023
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés
Embedded Temporal Feature Selection for Time Series Forecasting Using Deep Learning Conference
IWANN 17th International Work-Conference on Artificial Neural Networks, vol. 14135, Lecture Notes in Computer Science 2023.
Links | BibTeX | Tags: deep learning, feature selection, time series
@conference{JIMENEZ-NAVARRO23_IWANN,
title = {Embedded Temporal Feature Selection for Time Series Forecasting Using Deep Learning},
author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and F. Martínez-Álvarez and G. Asencio-Cortés},
url = {https://link.springer.com/chapter/10.1007/978-3-031-43078-7_2},
doi = {https://doi.org/10.1007/978-3-031-43078-7_2},
year = {2023},
date = {2023-09-30},
booktitle = {IWANN 17th International Work-Conference on Artificial Neural Networks},
volume = {14135},
pages = {15-26},
series = {Lecture Notes in Computer Science},
keywords = {deep learning, feature selection, time series},
pubstate = {published},
tppubtype = {conference}
}
M. García-Torres and R. Ruiz and F. Divina
Evolutionary feature selection on high dimensional data using a search space reduction approach Journal Article
In: Engineering Applications of Artificial Intelligence, vol. 117, pp. 105556, 2023.
Abstract | Links | BibTeX | Tags: big data, feature selection
@article{garcia2023evolutionary,
title = {Evolutionary feature selection on high dimensional data using a search space reduction approach},
author = {M. García-Torres and R. Ruiz and F. Divina},
url = {https://www.sciencedirect.com/science/article/pii/S0952197622005462},
doi = {10.1016/j.engappai.2022.105556},
year = {2023},
date = {2023-01-01},
journal = {Engineering Applications of Artificial Intelligence},
volume = {117},
pages = {105556},
publisher = {Elsevier},
abstract = {Feature selection is becoming more and more a challenging task due to the increase of the dimensionality of the data. The complexity of the interactions among features and the size of the search space make it unfeasible to find the optimal subset of features. In order to reduce the search space, feature grouping has arisen as an approach that allows to cluster feature according to the shared information about the class. On the other hand, metaheuristic algorithms have proven to achieve sub-optimal solutions within a reasonable time. In this work we propose a Scatter Search (SS) strategy that uses feature grouping to generate an initial population comprised of diverse and high quality solutions. Solutions are then evolved by applying random mechanisms in combination with the feature group structure, with the objective of maintaining during the search a population of good and, at the same time, as diverse as possible solutions. Not only does the proposed strategy provide the best subset of features found but it also reduces the redundancy structure of the data. We test the strategy on high dimensional data from biomedical and text-mining domains. The results are compared with those obtained by other adaptations of SS and other popular strategies. Results show that the proposed strategy can find, on average, the smallest subsets of features without degrading the performance of the classifier.},
keywords = {big data, feature selection},
pubstate = {published},
tppubtype = {article}
}
A. M. Chacón-Maldonado and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso
FS-Studio: An extensive and efficient feature selection experimentation tool for Weka Explorer Journal Article
In: SoftwareX, vol. 23, pp. 101401, 2023.
Links | BibTeX | Tags: feature selection
@article{Chacon2023,
title = {FS-Studio: An extensive and efficient feature selection experimentation tool for Weka Explorer},
author = {A. M. Chacón-Maldonado and G. Asencio-Cortés and F. Martínez-Álvarez and A. Troncoso},
url = {https://www.sciencedirect.com/science/article/pii/S2352711023000973},
doi = {https://doi.org/10.1016/j.softx.2023.101401},
year = {2023},
date = {2023-01-01},
journal = {SoftwareX},
volume = {23},
pages = {101401},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso
Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals Conference
IWANN International Work-conference on Artificial Intelligence, Lecture Notes in Computer Science 2023.
BibTeX | Tags: deep learning, feature selection, time series
@conference{IWANN2023,
title = {Deep Learning-Based Approach for Sleep Apnea Detection Using Physiological Signals},
author = {A. R. Troncoso-García and M. Martínez-Ballesteros and F. Martínez-Álvarez and A. Troncoso},
year = {2023},
date = {2023-01-01},
booktitle = {IWANN International Work-conference on Artificial Intelligence},
series = {Lecture Notes in Computer Science},
keywords = {deep learning, feature selection, time series},
pubstate = {published},
tppubtype = {conference}
}
G. Sosa-Cabrera and S. Gómez-Guerrero and M. García-Torres and C. E. Schaerer
Feature selection: A perspective on inter-attribute cooperation Journal Article
In: International Journal of Data Science and Analytics, pp. 1–13, 2023.
Abstract | Links | BibTeX | Tags: feature selection
@article{sosa2023feature,
title = {Feature selection: A perspective on inter-attribute cooperation},
author = {G. Sosa-Cabrera and S. Gómez-Guerrero and M. García-Torres and C. E. Schaerer},
url = {https://link.springer.com/article/10.1007/s41060-023-00439-z},
doi = {10.1007/s41060-023-00439-z},
year = {2023},
date = {2023-01-01},
journal = {International Journal of Data Science and Analytics},
pages = {1--13},
publisher = {Springer},
abstract = {High-dimensional datasets depict a challenge for learning tasks in data mining and machine learning. Feature selection is an effective technique in dealing with dimensionality reduction. It is often an essential data processing step prior to applying a learning algorithm. Over the decades, filter feature selection methods have evolved from simple univariate relevance ranking algorithms to more sophisticated relevance-redundancy trade-offs and to multivariate dependencies-based approaches in recent years. This tendency to capture multivariate dependence aims at obtaining unique information about the class from the intercooperation among features. This paper presents a comprehensive survey of the state-of-the-art work on filter feature selection methods assisted by feature intercooperation, and summarizes the contributions of different approaches found in the literature. Furthermore, current issues and challenges are introduced to identify promising future research and development.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
2022
A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown
Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm Journal Article
In: Information, vol. 13, no. 44, pp. 1–16, 2022.
Abstract | Links | BibTeX | Tags: feature selection, time series
@article{losada2022,
title = {Automatic Eligibility of Sellers in an Online Marketplace: A Case Study of Amazon Algorithm},
author = {A. Gómez-Losada and G. Asencio-Cortés and N. Duch-Brown},
url = {https://www.mdpi.com/2078-2489/13/2/44},
doi = {10.3390/info13020044},
year = {2022},
date = {2022-01-01},
journal = {Information},
volume = {13},
number = {44},
pages = {1--16},
abstract = {Purchase processes on Amazon Marketplace begin at the Buy Box, which represents the buy click process through which numerous sellers compete. This study aimed to estimate empirically the relevant seller characteristics that Amazon could consider featuring in the Buy Box. To that end, 22 product categories from Italy’s Amazon web page were studied over a ten-month period, and the sellers were analyzed through their products featured in the Buy Box. Two different experiments were proposed and the results were analyzed using four classification algorithms (a neural network, random forest, support vector machine, and C5.0 decision trees) and a rule-based classification. The first experiment aimed to characterize sellers unspecifically by predicting their change at the Buy Box. The second one aimed to predict which seller would be featured in it. Both experiments revealed that the customer experience and the dynamics of the sellers’ prices were important features of the Buy Box. Additionally, we proposed a set of default features that Amazon could consider when no information about sellers was available. We also proposed the possible existence of a relationship or composition among important features that could be used for sellers to be featured in the Buy Box.},
keywords = {feature selection, time series},
pubstate = {published},
tppubtype = {article}
}
S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer
Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty Journal Article
In: Entropy, vol. 24, no. 1, pp. 64, 2022.
Abstract | Links | BibTeX | Tags: feature selection
@article{gomez2022measuring,
title = {Measuring Interactions in Categorical Datasets Using Multivariate Symmetrical Uncertainty},
author = {S. Gómez-Guerrero and I. Ortiz and G. and Sosa-Cabrera and M. García-Torres and C.E. Schaerer},
url = {https://www.mdpi.com/1099-4300/24/1/64},
doi = {10.3390/e24010064},
year = {2022},
date = {2022-01-01},
journal = {Entropy},
volume = {24},
number = {1},
pages = {64},
publisher = {Multidisciplinary Digital Publishing Institute},
abstract = {Interaction between variables is often found in statistical models, and it is usually expressed in the model as an additional term when the variables are numeric. However, when the variables are categorical (also known as nominal or qualitative) or mixed numerical-categorical, defining, detecting, and measuring interactions is not a simple task. In this work, based on an entropy-based correlation measure for n nominal variables (named as Multivariate Symmetrical Uncertainty (MSU)), we propose a formal and broader definition for the interaction of the variables. Two series of experiments are presented. In the first series, we observe that datasets where some record types or combinations of categories are absent, forming patterns of records, which often display interactions among their attributes. In the second series, the interaction/non-interaction behavior of a regression model (entirely built on continuous variables) gets successfully replicated under a discretized version of the dataset. It is shown that there is an interaction-wise correspondence between the continuous and the discretized versions of the dataset. Hence, we demonstrate that the proposed definition of interaction enabled by the MSU is a valuable tool for detecting and measuring interactions within linear and non-linear models.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Sousa Brito and F. Martínez-Álvarez and G. Asencio-Cortés
Feature-Aware Drop Layer (FADL): A Nonparametric Neural Network Layer for Feature Selection Conference
SOCO 17th International Conference on Soft Computing Models in Industrial and Environmental Applications, vol. 531, Lecture Notes in Networks Systems 2022.
Links | BibTeX | Tags: deep learning, feature selection
@conference{FADL23,
title = {Feature-Aware Drop Layer (FADL): A Nonparametric Neural Network Layer for Feature Selection},
author = {M. J. Jiménez-Navarro and M. Martínez-Ballesteros and I. S. Sousa Brito and F. Martínez-Álvarez and G. Asencio-Cortés},
url = {https://link.springer.com/chapter/10.1007/978-3-031-18050-7_54},
year = {2022},
date = {2022-01-01},
booktitle = {SOCO 17th International Conference on Soft Computing Models in Industrial and Environmental Applications},
volume = {531},
pages = {557-566},
series = {Lecture Notes in Networks Systems},
keywords = {deep learning, feature selection},
pubstate = {published},
tppubtype = {conference}
}
2021
M. García-Torres and F. Gómez-Vela and F. Divina and D.P. Pinto-Roa and J.L. Vázquez Noguera and J.C. Román
Scatter search for high-dimensional feature selection using feature grouping Conference
GECCO Genetic and Evolutionary Computation Conference, 2021.
Links | BibTeX | Tags: big data, feature selection, pattern recognition
@conference{garcia2021scatter,
title = {Scatter search for high-dimensional feature selection using feature grouping},
author = {M. García-Torres and F. Gómez-Vela and F. Divina and D.P. Pinto-Roa and J.L. Vázquez Noguera and J.C. Román},
doi = {10.1145/3449726.3459481 pages=149--150},
year = {2021},
date = {2021-07-01},
booktitle = {GECCO Genetic and Evolutionary Computation Conference},
keywords = {big data, feature selection, pattern recognition},
pubstate = {published},
tppubtype = {conference}
}
R. Mortazavi and S. Mortazavi and A. Troncoso
Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer Journal Article
In: Engineering with Computers, 2021.
Abstract | Links | BibTeX | Tags: feature selection
@article{Mortazavi21,
title = {Wrapper-based feature selection using regression trees to predict intrinsic viscosity of polymer},
author = {R. Mortazavi and S. Mortazavi and A. Troncoso},
url = {https://link.springer.com/article/10.1007/s00366-020-01226-1},
doi = {10.1007/s00366-020-01226-1},
year = {2021},
date = {2021-01-01},
journal = {Engineering with Computers},
abstract = {This paper introduces different types of regression trees for viscosity property forecasting in polymer solutions. Although regression trees have been extensively used in other fields, they do not have been explored to predict the viscosity. One key issue in the context of materials science is to determine a priori which characteristics must be included to describe the prediction model due to a large number of molecular descriptors is obtained. To deal with this, we propose a wrapper method to select the features based on regression trees. Thus, we use regression trees to evaluate different subsets of attributes and build a model from the subset of features that achieved the minimum error. In particular, the performance of eight regression tree algorithms, including both linear and non-linear models, is evaluated and compared to other forecasting approaches using a dataset composed of 64 polymers and 2962 molecular descriptors. The results show that regression trees with nearest neighbors based local models in leaves predict with high accuracy. Moreover, results have been compared to other forecasting approaches such as multivariate linear regression, neural networks and support vector machines showing remarkable improvements in terms of accuracy.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
J. Roiz-Pagador and A. M. Chacon-Maldonado and R. Ruiz and G. Asencio-Cortes
Earthquake Prediction in California using Feature Selection techniques Conference
SOCO 16th International Conference on Soft Computing Models in Industrial and Environmental Applications, Advances in Intelligent Systems and Computing 2021.
Links | BibTeX | Tags: feature selection, natural disasters, time series
@conference{roiz2022,
title = {Earthquake Prediction in California using Feature Selection techniques},
author = {J. Roiz-Pagador and A. M. Chacon-Maldonado and R. Ruiz and G. Asencio-Cortes},
url = {https://link.springer.com/chapter/10.1007/978-3-030-87869-6_69},
year = {2021},
date = {2021-01-01},
booktitle = {SOCO 16th International Conference on Soft Computing Models in Industrial and Environmental Applications},
series = {Advances in Intelligent Systems and Computing},
keywords = {feature selection, natural disasters, time series},
pubstate = {published},
tppubtype = {conference}
}
S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor
Adjacent Inputs With Different Labels and Hardness in Supervised Learning Journal Article
In: IEEE Access, pp. 162487–162498, 2021.
Links | BibTeX | Tags: feature selection, pattern recognition
@article{grillo2021adjacent,
title = {Adjacent Inputs With Different Labels and Hardness in Supervised Learning},
author = {S.A. Grillo and J.C. Román and J.D. Mello-Román and J.L. Vázquez Noguera and M. García-Torres and F. Divina and P.E. Sotomayor},
doi = {10.1109/ACCESS.2021.3131150 volume=9},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
pages = {162487--162498},
publisher = {IEEE pubstate = published},
keywords = {feature selection, pattern recognition},
pubstate = {published},
tppubtype = {article}
}
F. Pietrapiana and J. M. Feria-Dominguez and A. Troncoso
Applying wrapper-based variable selection techniques to predict MFIs profitability: evidence from Peru Journal Article
In: Journal of Development Effectiveness, 2021.
Abstract | Links | BibTeX | Tags: feature selection
@article{JDE_Feria,
title = {Applying wrapper-based variable selection techniques to predict MFIs profitability: evidence from Peru},
author = {F. Pietrapiana and J. M. Feria-Dominguez and A. Troncoso},
doi = {10.1080/19439342.2021.1884119},
year = {2021},
date = {2021-01-01},
journal = {Journal of Development Effectiveness},
abstract = {In this paper, we analyse the main factors explaining the profitability (ROA) of Microfinance Institutions (MFIs) in Peru from 2011 to 2107. We apply three wrapper techniques to a sample of 168 Peruvians MFIs and 69 attributes obtained from MIX Market database. After running the algorithms M5ʹ, k nearest neighbours (KNN) and Random Forest, we find that the M5ʹ algorithm provides the best fit for predicting ROA. Particularly, the key variable of the regression tree is the percentage of expenses over assets and, depending on its value, it is followed by net income after taxes and before donations, or profit margins.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
2019
G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina
A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem Journal Article
In: Information Sciences, vol. 494, pp. 1–20, 2019.
Abstract | Links | BibTeX | Tags: feature selection
@article{IS-2019,
title = {A multivariate approach to the symmetrical uncertainty measure: Application to feature selection problem},
author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez-Guerrero and C.E. Schaerer and F. Divina},
url = {https://www.sciencedirect.com/science/article/pii/S0020025519303603},
doi = {https://doi.org/10.1016/j.ins.2019.04.046},
year = {2019},
date = {2019-01-01},
journal = {Information Sciences},
volume = {494},
pages = {1--20},
abstract = {In this work we propose an extension of the Symmetrical Uncertainty (SU) measure in order to address the multivariate case, simultaneously acquiring the capability to detect possible correlations and interactions among features. This generalization, denoted Multivariate Symmetrical Uncertainty (MSU), is based on the concepts of Total Correlation (TC) and Mutual Information (MI) extended to the multivariate case. The generalized measure accounts for the total amount of dependency within a set of variables as a single monolithic quantity. Multivariate measures are usually biased due to several factors. To overcome this problem, a mathematical expression is proposed, based on the cardinality of all features, which can be used to calculate the number of samples needed to estimate the MSU without bias at a pre-specified significance level. Theoretical and experimental results on synthetic data show that the proposed sample size expression properly controls the bias. In addition, when the MSU is applied to feature selection on synthetic and real-world data, it has the advantage of adequately capturing linear and nonlinear correlations and interactions, and it can therefore be used as a new feature subset evaluation method.},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
2018
G. Sosa-Cabrera and M. García-Torres and S. Gómez Guerrero and C.E. Schaerer and F. Divina
Understanding a multivariate semi-metric in the search strategies for attributes subset selection Conference
Proceeding Series of the Brazilian Society of Computational and Applied Mathematics, 2018.
Links | BibTeX | Tags: feature selection
@conference{Sosa2018b,
title = {Understanding a multivariate semi-metric in the search strategies for attributes subset selection},
author = {G. Sosa-Cabrera and M. García-Torres and S. Gómez Guerrero and C.E. Schaerer and F. Divina},
url = {https://proceedings.sbmac.emnuvens.com.br/sbmac/article/view/2506},
year = {2018},
date = {2018-01-01},
booktitle = {Proceeding Series of the Brazilian Society of Computational and Applied Mathematics},
keywords = {feature selection},
pubstate = {published},
tppubtype = {conference}
}
2016
M. García-Torres and F. Gómez-Vela and B. Melián-Batista and J. Marcos Moreno-Vega
High-dimensional feature selection via feature grouping: A Variable neighborhood Search approach Journal Article
In: Information Sciences, vol. 326, pp. 102-118, 2016.
Links | BibTeX | Tags: feature selection
@article{IS:GT-2016,
title = {High-dimensional feature selection via feature grouping: A Variable neighborhood Search approach},
author = {M. García-Torres and F. Gómez-Vela and B. Melián-Batista and J. Marcos Moreno-Vega},
url = {https://www.sciencedirect.com/science/article/pii/S0020025515005460},
doi = {10.1016/j.ins.2015.07.041},
year = {2016},
date = {2016-01-01},
journal = {Information Sciences},
volume = {326},
pages = {102-118},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
2013
M. García-Torres and R. Arma~nanzas and C. Bielza and P. Larra~naga
Comparison of metaheuristic strategies for peakbin selection in proteomic mass spectrometry data Journal Article
In: Information Sciences, vol. 222, pp. 229-246, 2013.
Links | BibTeX | Tags: bioinformatics, feature selection
@article{IS:GT-2013,
title = {Comparison of metaheuristic strategies for peakbin selection in proteomic mass spectrometry data},
author = {M. García-Torres and R. Arma{~n}anzas and C. Bielza and P. Larra~naga},
url = {https://www.sciencedirect.com/science/article/pii/S0020025510006195},
doi = {10.1016/j.ins.2010.12.013},
year = {2013},
date = {2013-01-01},
journal = {Information Sciences},
volume = {222},
pages = {229-246},
keywords = {bioinformatics, feature selection},
pubstate = {published},
tppubtype = {article}
}
2012
R. Ruíz and J. Riquelme and J. Aguilar-Ruíz and M. García-Torres
Fast feature selection aimed at high dimensional data via hybrid-sequential-ranked searches Journal Article
In: Expert Systems with Applications, vol. 39, no. 12, pp. 11094-11102, 2012.
Links | BibTeX | Tags: feature selection
@article{ESA:Rod-2012,
title = {Fast feature selection aimed at high dimensional data via hybrid-sequential-ranked searches},
author = {R. Ruíz and J. Riquelme and J. Aguilar-Ruíz and M. García-Torres},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0957417412005842},
doi = {10.1016/j.eswa.2012.03.061},
year = {2012},
date = {2012-01-01},
journal = {Expert Systems with Applications},
volume = {39},
number = {12},
pages = {11094-11102},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}
2006
F.~C. García-López and M. García-Torres and B. Melián-Batista and J.~A. Moreno Pérez and J.~M. Moreno-Vega
Solving the Feature Selection Problem by a Parallel Scatter Search Journal Article
In: European Journal of Operations Research, vol. 169, no. 2, pp. 477-489, 2006.
Links | BibTeX | Tags: feature selection
@article{EJOR:GL-2006,
title = {Solving the Feature Selection Problem by a Parallel Scatter Search},
author = {F.~C. García-López and M. García-Torres and B. Melián-Batista and J.~A. Moreno Pérez and J.~M. Moreno-Vega},
url = {https://www.sciencedirect.com/science/article/abs/pii/S0377221704005491},
doi = {10.1016/j.ejor.2004.08.010},
year = {2006},
date = {2006-01-01},
journal = {European Journal of Operations Research},
volume = {169},
number = {2},
pages = {477-489},
keywords = {feature selection},
pubstate = {published},
tppubtype = {article}
}