Publications
2025
A. Lopez-Fernandez and F. Divina and F. A. Gomez-Vela and M. Garcia-Torres
Data mining for enhancing learning and assessment to a microcompetence-based methodology in higher education Journal Article
In: IEEE Revista Iberoamericana de Tecnologias del Aprendizaje, 2025.
Abstract | Links | BibTeX | Tags: data mining, education
@article{lopez2025data,
title = {Data mining for enhancing learning and assessment to a microcompetence-based methodology in higher education},
author = { A. Lopez-Fernandez and F. Divina and F. A. Gomez-Vela and M. Garcia-Torres},
doi = {10.1109/RITA.2025.3532879},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {IEEE Revista Iberoamericana de Tecnologias del Aprendizaje},
publisher = {IEEE},
abstract = {This work introduces an innovative teaching methodology based on microcompetences applied in a higher education context. The intervention involved creating a repository of practical case studies in the form of quizzes and integrating microcompetences
into each course activity. The digital tool Sapiens was used to identify learning deficiencies and provide both collective and individualized feedback. The results indicate a significant increase in student participation and academic performance compared to
previous years. Furthermore, students voluntarily used virtual teaching modalities to reinforce their knowledge, particularly in more complex areas. Data mining techniques identified performance patterns among students, highlighting the methodology’s
effectiveness in improving both transversal and specific competences. The study’s findings underscore the importance of implementing microcompetency-based methodologies in higher education to enhance the quality of learning and continuous assessment. This
approach not only facilitated a deeper understanding of course content but also promoted critical thinking, abstract reasoning, and interpersonal skills, preparing students for future academic and professional challenges. Additionally, the flexibility and
adaptability of the digital tools used provided a seamless transition across different teaching modalities, such as in-person, hybrid, and online formats. Thus, the implementation of this innovative methodology has demonstrated its potential to significantly
improve student engagement, participation, and academic success, thereby contributing to a more effective and comprehensive educational experience in higher education. url = https://ieeexplore.ieee.org/abstract/document/10849581},
keywords = {data mining, education},
pubstate = {published},
tppubtype = {article}
}
into each course activity. The digital tool Sapiens was used to identify learning deficiencies and provide both collective and individualized feedback. The results indicate a significant increase in student participation and academic performance compared to
previous years. Furthermore, students voluntarily used virtual teaching modalities to reinforce their knowledge, particularly in more complex areas. Data mining techniques identified performance patterns among students, highlighting the methodology’s
effectiveness in improving both transversal and specific competences. The study’s findings underscore the importance of implementing microcompetency-based methodologies in higher education to enhance the quality of learning and continuous assessment. This
approach not only facilitated a deeper understanding of course content but also promoted critical thinking, abstract reasoning, and interpersonal skills, preparing students for future academic and professional challenges. Additionally, the flexibility and
adaptability of the digital tools used provided a seamless transition across different teaching modalities, such as in-person, hybrid, and online formats. Thus, the implementation of this innovative methodology has demonstrated its potential to significantly
improve student engagement, participation, and academic success, thereby contributing to a more effective and comprehensive educational experience in higher education. url = https://ieeexplore.ieee.org/abstract/document/10849581
M. Garcia-Torres and F. Saucedo and F. Divina and S. Gómez
RFMSU: A multivariate symmetrical uncertainty based random forest Journal Article
In: Pattern Recognition, vol. 169, pp. 111939, 2025.
Abstract | Links | BibTeX | Tags: Classification, data mining
@article{garcia2025rfmsu,
title = {RFMSU: A multivariate symmetrical uncertainty based random forest},
author = {M. Garcia-Torres and F. Saucedo and F. Divina and S. Gómez},
url = {https://www.sciencedirect.com/science/article/pii/S0031320325005990?via%3Dihub},
doi = {https://doi.org/10.1016/j.patcog.2025.111939},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-01},
journal = {Pattern Recognition},
volume = {169},
pages = {111939},
publisher = {Elsevier},
abstract = {Decision Trees (DTs) have become very popular classifiers due to their good performance and, most of all, their interpretability. In addition, the machine learning community is also paying attention to Random Forests (RFs) since they defy the interpretability-accuracy tradeoff. Most RFs strategies are based on univariate measures, a fact that may limit the capability of identifying the interaction among more than two features. In order to overcome this problem many multivariate approaches have been proposed. However, most of them are based on finding linear or non-linear combinations of features. In this work, we propose a novel univariate RF strategy that builds DTs using the Multivariate Symmetrical Uncertainty (MSU) measure as splitting criterion. The proposal, referred to as RF$_MSU$, was tested on high-dimensional datasets and compared to state-of-the-art univariate
and multivariate DTs and RFs classifiers. Results suggest that RF$_MSU$ is capable of finding simpler rules than other RFs approaches while keeping a high predictive power equivalent to that of multivariate approaches. The DT strategies considered obtained simpler models than RF$_MSU$, but at the expense of degrading the classifier. Thus, we can conclude that RFMS U is a RF-based classifier that achieves a good trade-off between the performance and the complexity of the model.},
keywords = {Classification, data mining},
pubstate = {published},
tppubtype = {article}
}
and multivariate DTs and RFs classifiers. Results suggest that RF$_MSU$ is capable of finding simpler rules than other RFs approaches while keeping a high predictive power equivalent to that of multivariate approaches. The DT strategies considered obtained simpler models than RF$_MSU$, but at the expense of degrading the classifier. Thus, we can conclude that RFMS U is a RF-based classifier that achieves a good trade-off between the performance and the complexity of the model.
J. L. Vázquez Noguera and A. Torres-Hurtado and H. Gómez-Adorno and J. C. Mello-Román and E. J. Fleitas-Alvarez and F. F. Espinola Schulze and M. García-Torres and C. D. Méndez Gaona and P. E. Gardel Sotomayor and S. Vázquez Noguera and N. E. Zaracho Amarilla and O. W. Gamarra Esquivel
Mammography Reporting Dataset with BI-RADS System for Natural Language Processing Applications: Addressing Public Data Gaps in Spanish Journal Article
In: Data in Brief, vol. 61, pp. 111761, 2025.
Abstract | Links | BibTeX | Tags: Classification, data mining
@article{vazquez2025mammography,
title = {Mammography Reporting Dataset with BI-RADS System for Natural Language Processing Applications: Addressing Public Data Gaps in Spanish},
author = { J. L. Vázquez Noguera and A. Torres-Hurtado and H. Gómez-Adorno and J. C. Mello-Román and E. J. Fleitas-Alvarez and F. F. Espinola Schulze and M. García-Torres and C. D. Méndez Gaona and P. E. Gardel Sotomayor and S. Vázquez Noguera and N. E. Zaracho Amarilla and O. W. Gamarra Esquivel},
url = {https://www.sciencedirect.com/science/article/pii/S2352340925004883?via%3Dihub},
doi = {https://doi.org/10.1016/j.dib.2025.111761},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {61},
pages = {111761},
publisher = {Elsevier},
abstract = {Applying Natural Language Processing (NLP) to clinical reports is important for automating the analysis and classification of clinical data, improving diagnostic accuracy, and enhancing healthcare workflows. This article presents a dataset derived from mammography reports written in Spanish collected across multiple medical units operated by the Oxades company in Paraguay. The dataset contains 4,357 records and 15 variables, including the text of the complete report and also each of its sections separately (clinical observations, diagnostic conclusions, follow-up recommendations), and the BI-RADS (Breast Imaging Reporting and Data System) classification assigned to each one of the reports. Additionally, the dataset includes metadata such as report IDs, dates, and patient information such as age, patient reasons for the analysis, last menstruation period, type of hormonal therapy received, family history and number of children. To ensure patient confidentiality, all identifiable data was removed, and the dataset was structured using automated segmentation and manual verification to ensure quality and transparency. This dataset is an invaluable resource for both medical and AI research communities. It provides real-world data for developing and testing NLP algorithms and machine learning models, specifically for automating BI-RADS classification and analyzing mammography reports.},
keywords = {Classification, data mining},
pubstate = {published},
tppubtype = {article}
}