@inproceedings{IJCNN-25,
author = {Nascita, Alfredo and Krolikowski, Jonatan and Persico, Valerio and Pescape, Antonio and Rossi, Dario},
title = {Localizing and Exploiting Concept Areas in LLMs for Downstream Classification Tasks},
year = {2025},
month = jun,
booktitle = {IEEE International Joint Conference on Neural Networks (IJCNN'25)},
howpublished = {https://ieeexplore.ieee.org/abstract/document/11228856}
}
[PAKDD-25]
Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario,
"Changepoint Detection via Subset Chains"
29th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD’25)
jun.
2025,
Conference
@inproceedings{PAKDD-25,
author = {Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario},
title = {Changepoint Detection via Subset Chains},
year = {2025},
month = jun,
booktitle = {29th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD'25)},
howpublished = {https://link.springer.com/chapter/10.1007/978-981-96-8183-9_19}
}
@inproceedings{ICLR-25,
author = {Huet, Alexis and Ben Houidi, Zied and Rossi, Dario},
title = {Episodic Memories Generation and Evaluation Benchmark for Large Language Models},
year = {2025},
month = apr,
howpublished = {https://proceedings.iclr.cc/paper_files/paper/2025/file/7ff013b7e372ba5b790352ccd6908f03-Paper-Conference.pdf},
arxiv = {https://arxiv.org/abs/2501.13121},
booktitle = {The Thirteenth International Conference on Learning Representations (ICLR'25)}
}
[PATENT-PCT/EP25157197]
Manuel, NAVARRO Jose and Jonatan, KROLIKOWSKI and Alessandro, FINAMORE and Xiaosheng, BAO and Zihan, JIANG and Zishuo, YAN and Mowei, WANG and ROSSI Dario,
"Machine learning system and computer-implement method for machine learning system for grouped flow sampling" ,
feb.
2025,
Patent
@misc{DR:PATENT-PCT/EP25157197,
author = {Manuel, NAVARRO Jose and Jonatan, KROLIKOWSKI and Alessandro, FINAMORE and Xiaosheng, BAO and Zihan, JIANG and Zishuo, YAN and Mowei, WANG and ROSSI Dario},
title = {Machine learning system and computer-implement method for machine learning system for grouped flow sampling},
month = feb,
patent = {True},
year = {2025},
howpublished = {}
}
@article{arXiv:2502.04390,
title = {In Praise of Stubbornness: An Empirical Case for Cognitive-Dissonance Aware Continual Update of Knowledge in LLMs},
author = {Clemente, Simone and Houidi, Zied Ben and Huet, Alexis and Rossi, Dario and Franzese, Giulio and Michiardi, Pietro},
year = {2025},
journal = {arXiv},
month = feb,
howpublished = {https://arxiv.org/abs/2502.04390},
arxiv = {https://arxiv.org/abs/2502.04390}
}
[WTMC-25]
De Santis, Francesco and Huang, Kai and Valentim, Rodolfo and Giordano, Danilo and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario,
"Cfa-bench: Cybersecurity forensic llm agent benchmark and testing"
10th International Workshop on Traffic Measurements for Cybersecurity (WTMC’25) colocated with IEEE European Symposium on Security and Privacy (EuroSP’25)
2025,
Conference
@inproceedings{WTMC-25,
author = {De Santis, Francesco and Huang, Kai and Valentim, Rodolfo and Giordano, Danilo and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
title = {Cfa-bench: Cybersecurity forensic llm agent benchmark and testing},
booktitle = {10th International Workshop on Traffic Measurements for Cybersecurity (WTMC'25) colocated with IEEE European Symposium on Security and Privacy (EuroSP'25)},
pages = {217--225},
year = {2025},
howpublished = {https://ieeexplore.ieee.org/abstract/document/11129512}
}
[TNSM-24b]
Gioacchini, Luca and Mellia, Marco and Vassio, Luca and Drago, Idilio and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario,
"Cross-Network Embeddings Transfer for Traffic Analysis"
In IEEE Transactions on Network and Service Management,
Vol. 21,
No. 3,
pp.2686-2699,
jun.
2024,
DOI 10.1109/TNSM.2023.3329442
Journal
@article{TNSM-24b,
author = {Gioacchini, Luca and Mellia, Marco and Vassio, Luca and Drago, Idilio and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Cross-Network Embeddings Transfer for Traffic Analysis},
month = jun,
year = {2024},
volume = {21},
number = {3},
pages = {2686-2699},
doi = {10.1109/TNSM.2023.3329442},
howpublished = {https://ieeexplore.ieee.org/abstract/document/10304313}
}
@article{arxiv:2405.02649,
author = {Gioacchini, Luca and Drago, Idilio and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
title = {Generic Multi-modal Representation Learning for Network Traffic Analysis},
month = may,
year = {2024},
howpublished = {https://arxiv.org/abs/2405.02649},
arxiv = {https://arxiv.org/abs/2405.02649}
}
[PATENT-PCT/EP24161884]
Manuel, NAVARRO Jose and Alexis, HUET and Jonatan, KROLIKOWSKI and FUXING, Chen and Shuijing, ZHAO and Xiaosheng, BAO and ROSSI Dario,
"Unsupervised detection of irregular communication behavior of communication devices" ,
mar.
2024,
Patent
@misc{DR:PATENT-PCT/EP24161884,
author = {Manuel, NAVARRO Jose and Alexis, HUET and Jonatan, KROLIKOWSKI and FUXING, Chen and Shuijing, ZHAO and Xiaosheng, BAO and ROSSI Dario},
title = {Unsupervised detection of irregular communication behavior of communication devices},
month = mar,
patent = {True},
year = {2024},
howpublished = {}
}
[TNSM-24a]
Bovenzi, Giampaolo and Nascita, Alfredo and Yang, Lixuan and Finamore, Alessandro and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario,
"Benchmarking Class Incremental Learning in Deep Learning Traffic Classification"
In IEEE Transactions on Network and Service Management,
Vol. 21,
No. 1,
pp.51-69,
feb.
2024,
DOI 10.1109/TNSM.2023.3287430
Journal
@article{TNSM-24a,
author = {Bovenzi, Giampaolo and Nascita, Alfredo and Yang, Lixuan and Finamore, Alessandro and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Benchmarking Class Incremental Learning in Deep Learning Traffic Classification},
year = {2024},
volume = {21},
month = feb,
number = {1},
pages = {51-69},
doi = {10.1109/TNSM.2023.3287430},
howpublished = {https://ieeexplore.ieee.org/abstract/document/10155294}
}
[COMNET-24]
Cerasuolo, Francesco and Nascita, Alfredo and Bovenzi, Giampaolo and Aceto, Giuseppe and Ciuonzo, Domenico and Pescapè, Antonio and Rossi, Dario,
"MEMENTO: A novel approach for class incremental learning of encrypted traffic"
In Computer Networks,
pp.110374,
2024,
DOI https://doi.org/10.1016/j.comnet.2024.110374
Journal
@article{COMNET-24,
title = {{MEMENTO: A novel approach for class incremental learning of encrypted traffic}},
journal = {Computer Networks},
pages = {110374},
year = {2024},
issn = {1389-1286},
doi = {https://doi.org/10.1016/j.comnet.2024.110374},
howpublished = {https://www.sciencedirect.com/science/article/pii/S1389128624002068},
author = {Cerasuolo, Francesco and Nascita, Alfredo and Bovenzi, Giampaolo and Aceto, Giuseppe and Ciuonzo, Domenico and Pescapè, Antonio and Rossi, Dario},
keywords = {Traffic classification, Class incremental learning, Mobile apps, Encrypted traffic, Deep learning}
}
In the ever-changing digital environment, ensuring the ongoing effectiveness of traffic analysis and security measures is crucial. Therefore, Class Incremental Learning (CIL) in encrypted Traffic Classification (TC) is essential for adapting to evolving network behaviors and the rapid development of new applications. However, the application of CIL techniques in the TC domain is not straightforward, usually leading to unsatisfactory performance figures. Specifically, the improvement goal is to reduce forgetting on old apps and increase the capacity in learning new ones, in order to improve overall classification performance— reducing the drop from a model “trained-from-scratch”. The contribution of this work is the design of a novel fine-tuning approach called MEMENTO, which is obtained through the careful design of different building blocks: memory management, model training, and rectification strategies. In detail, we propose the application of traffic biflows augmentation strategies to better capitalize on old apps biflows, we introduce improvements in the distillation stage, and we design a general rectification strategy that includes several existing proposals. To assess our proposal, we leverage two publicly-available encrypted network traffic datasets, i.e., MIRAGE19 and CESNET-TLS22. As a result, on both datasets MEMENTO achieves a significant improvement in classifying new apps (w.r.t. the best-performing alternative, i.e., BiC) while maintaining stable performance on old ones. Equally important, MEMENTO achieves satisfactory overall TC performance, filling the gap toward a trained-from-scratch model and offering a considerable gain in terms of time (up to 10× speed-up) to obtain up-to-date and running classifiers. The experimental evaluation relies on a comprehensive performance evaluation workbench for CIL proposals, which is based on a wider set of metrics (as opposed to the existing literature in TC).
@inproceedings{CoNEXT-24a,
author = {Azorin, Raphael and Monterubbiano, Andrea and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
title = {Taming the Elephants: Affordable Flow Length Prediction in the Data Plane},
year = {2024},
howpublished = {https://doi.org/10.1145/3649473},
doi = {10.1145/3649473},
booktitle = {Proc. of CoNEXT'24 (PACMNET).}
}
Machine Learning (ML) shows promising potential for enhancing networking tasks by providing early traffic predictions. However, implementing an ML-enabled system is a challenging task due to network devices limited resources. While previous works have shown the feasibility of running simple ML models in the data plane, integrating them into a practical end-to-end system is not an easy task. It requires addressing issues related to resource management and model maintenance to ensure that the performance improvement justifies the system overhead. In this work, we propose DUMBO, a versatile end-to-end system to generate and exploit early flow size predictions at line rate. Our system seamlessly integrates and maintains a simple ML model that offers early coarse-grain flow size prediction in the data plane. We evaluate the proposed system on flow scheduling, per-flow packet inter-arrival time distribution, and flow size estimation using real traffic traces, and perform experiments using an FPGA prototype running on an AMD(R)-Xilinx(R) Alveo U280 SmartNIC. Our results show that DUMBO outperforms traditional state-of-the-art approaches by equipping network devices data planes with a lightweight ML model. Code is available at https://github.com/cpt-harlock/DUMBO.
[PAM-24]
Wang, Chao and Finamore, Alessandro and Pietro, Michiardi and Gallo, Massimo and Rossi, Dario,
"Data Augmentation for Traffic Classification"
Passive and Active Measurements (PAM)
2024,
arXiv Conference Runner-up
@inproceedings{PAM-24,
title = {{Data Augmentation for Traffic Classification}},
author = {Wang, Chao and Finamore, Alessandro and Pietro, Michiardi and Gallo, Massimo and Rossi, Dario},
year = {2024},
booktitle = {Passive and Active Measurements (PAM)},
note = {bestpaperrunnerup},
arxiv = {https://arxiv.org/abs/2401.10754},
howpublished = {https://arxiv.org/abs/2401.10754}
}
@inproceedings{CoNEXT-24b,
author = {Bui, Minh-Thanh and Boffa, Matteo and Vieira Valentim, Rodolfo and Navarro, Jose Manuel and Chen, Fuxing and Bao, Xiaosheng and Ben Houidi, Zied and Rossi, Dario},
title = {A Systematic Comparison of Large Language Models Performance for Intrusion Detection},
year = {2024},
howpublished = {https://doi.org/10.1145/3696379},
doi = {10.1145/3696379},
booktitle = {Proc. of CoNEXT'24 (PACMNET).}
}
We explore the capabilities of Large Language Models (LLMs) to assist or substitute devices (i.e., firewalls) and humans (i.e., security experts) respectively in the detection and analysis of security incidents. We leverage transformer-based technologies, from relatively small to foundational sizes, to address the problem of correctly identifying the attack severity (and accessorily identifying and explaining the attack type). We contrast a broad range of LLM techniques (prompting, retrieval augmented generation, and fine-tuning of several models) using state-of-the-art machine learning models as a baseline. Using proprietary data from commercial deployment, our study provides an unbiased picture of the strengths and weaknesses of LLM for intrusion detection.
@inproceedings{CoNEXT-23a,
title = {{SPADA: A Sparse Approximate Data Structure representation for data plane per-flow monitoring}},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program},
month = dec,
year = {2023}
}
@inproceedings{CoNEXT-23b,
title = {Change Point Detection in WLANs with Random AP Forests},
author = {Huet, Alexis and Krolikowski, Jonatan and Navarro, Jose Manuel and Chen, Fuxing and Rossi, Dario},
booktitle = {ACM CoNEXT},
doi = {10.1145/3624354.3630587},
howpublished = {https://doi.org/10.1145/3624354.3630587},
month = dec,
year = {2023}
}
Troubleshooting WiFi networks is knowingly difficult due to the variability of the wireless medium. Complementary to existing works that focus on detecting short-term fluctuations of radio signals (i.e., anomalies), we tackle the problem of reliably detecting long-term changes in statistical properties of WiFi networks. We propose a new method to reliably gain insights on such environmental changes, which we refer to as Random Access Point Forest (RAPF). RAPF identifies the changes from a forest of individual learners, each of them consisting of a random tree approximating the signal of a specific pair of APs. The biased selection of APs in a distributed manner along with the stochastic construction of each individual tree ensure its robustness to noise and biases. We conduct a measurement campaign on a real WLAN by collecting the path loss among pairs of APs in a network for which labels are available and perform an extensive comparison of our methodology against state-of-the-art change point methodologies, which conclusively shows RAPF to yield the most robust detection capabilities.
@inproceedings{CoNEXT-23c,
title = {Toward Generative Data Augmentation for Traffic Classification},
author = {Wang, Chao and Finamore, Alessandro and Gallo, Massimo and Michiardi, Pietro and Rossi, Dario},
booktitle = {ACM CoNEXT, Student Workshop},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-student},
month = dec,
year = {2023}
}
[CoNEXT-23d]
Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario,
"Memory-efficient Random Forests in FPGA SmartNICs"
ACM CoNEXT, Poster session
dec.
2023,
Conference
@inproceedings{CoNEXT-23d,
title = {Memory-efficient Random Forests in FPGA SmartNICs},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT, Poster session},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-poster},
month = dec,
year = {2023}
}
Random Forests (RF) have been a popular Machine Learning (ML) algorithm for more than two decades. This success can be attributed to its simplicity, effectiveness and explainability. However, implementing them in a high-speed programmable data plane is not trivial. To make predictions, i.e., inference, RFs must traverse each tree from the root to the leaf by comparing the features vector at each split node. This process is particularly challenging in network devices where memory is limited, and packet processing cannot be delayed, i.e., predictions occur at line rate. Nevertheless, this implementation is crucial for incorporating recent ML advances in the network, which could benefit use cases such as scheduling, measurements, and routing [1]. Prior studies such as Planter [4] have examined the implementation of RF in network switches, mapping trees to Match-Action Tables (MAT). Another line of work focused on RF implementations optimized for FPGA, mapping tree layers to pipeline stages as done in [2]. Such approaches use different tree representations that naturally come with their strengths and weaknesses depending on the trees’ sparsity, depth, and input features. In this work we (1) propose a novel representation for FPGA-based Random Forests, (2) compare it against state-of-the-art implementations in terms of memory and computation requirements, and (3) evaluate our design on a flow classification task using CAIDA traffic traces.
[TNSM-23]
Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario,
"Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes"
In IEEE Transactions on Network and Service Management,
Vol. 20,
No. 4,
pp.5012-5025,
dec.
2023,
DOI 10.1109/TNSM.2023.3267671
Journal
@article{DR:TNSM-23,
author = {Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes},
month = dec,
year = {2023},
volume = {20},
number = {4},
pages = {5012-5025},
doi = {10.1109/TNSM.2023.3267671},
howpublished = {https://ieeexplore.ieee.org/document/10102919}
}
Darknets collect unsolicited traffic reaching unused address spaces. They provide insights into malicious activities, such as the rise of botnets and DDoS attacks. However, darknets provide a shallow view, as traffic is never responded. Here we quantify how their visibility increases by responding to traffic with interactive responders with increasing levels of interaction. We consider four deployments: Darknets, simple, vertical bound to specific ports, and, a honeypot that responds to all protocols on any port. We contrast these alternatives by analyzing the traffic attracted by each deployment and characterizing how traffic changes throughout the responder lifecycle on the darknet. We show that the deployment of responders increases the value of darknet data by revealing patterns that would otherwise be unobservable. We measure Side-Scan phenomena where once a host starts responding, it attracts traffic to other ports and neighboring addresses. uncovers attacks that darknets and would not observe, e.g. large-scale activity on non-standard ports. And we observe how quickly senders can identify and attack new responders. The “enlightened” part of a darknet brings several benefits and offers opportunities to increase the visibility of sender patterns. This information gain is worth taking advantage of, and we, therefore, recommend that organizations consider this option.
@inproceedings{ICDM-23,
author = {Kong, Lanfang and Huet, Alexis and Rossi, Dario and Sozio, Mauro},
title = {Tree-based Kendall tau Maximization for Explainable Unsupervised Anomaly Detection},
booktitle = {IEEE International Conference on Data Mining (ICDM)},
year = {2023},
month = dec,
howpublished = {https://ieeexplore.ieee.org/abstract/document/10415648}
}
We study the problem of building a regression tree with relatively small size, which maximizes the Kendall’s tau coefficient between the anomaly scores of a source anomaly detection algorithm and those predicted by our regression tree. We consider a labeling function which assigns to each leaf the inverse of its size, thereby providing satisfactory explanations when comparing examples with different anomaly scores. We show that our approach can be used as a post-hoc model, i.e. to provide global explanations for an existing anomaly detection algorithm. Moreover, it can be used as an in-model approach, i.e. the source anomaly detection algorithm can be replaced all together. This is made possible by leveraging the off-the-shelf transparency of tree-based approaches and from the fact that the explanations provided by our approach do not rely on the source anomaly detection algorithm. The main technical challenge to tackle is the efficient computation of the Kendall’s tau coefficients when determining the best split at each node of the regression tree. We show how such a coefficient can be computed incrementally, thereby making the running time of our algorithm almost linear (up to a logarithmic factor) in the size of the input. Our approach is completely unsupervised, which is appealing in the case when it is difficult to collect a large number of labeled examples. We complement our study with an extensive experimental evaluation against the state-of-the-art, showing the effectiveness of our approach.