@inproceedings{CoNEXT-23a,
title = {{SPADA: A Sparse Approximate Data Structure representation for data plane per-flow monitoring}},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program},
month = dec,
year = {2023}
}
@inproceedings{CoNEXT-23b,
title = {Change Point Detection in WLANs with Random AP Forests},
author = {Huet, Alexis and Krolikowski, Jonatan and Navarro, Jose Manuel and Chen, Fuxing and Rossi, Dario},
booktitle = {ACM CoNEXT},
doi = {10.1145/3624354.3630587},
howpublished = {https://doi.org/10.1145/3624354.3630587},
month = dec,
year = {2023}
}
Troubleshooting WiFi networks is knowingly difficult due to the variability of the wireless medium. Complementary to existing works that focus on detecting short-term fluctuations of radio signals (i.e., anomalies), we tackle the problem of reliably detecting long-term changes in statistical properties of WiFi networks. We propose a new method to reliably gain insights on such environmental changes, which we refer to as Random Access Point Forest (RAPF). RAPF identifies the changes from a forest of individual learners, each of them consisting of a random tree approximating the signal of a specific pair of APs. The biased selection of APs in a distributed manner along with the stochastic construction of each individual tree ensure its robustness to noise and biases. We conduct a measurement campaign on a real WLAN by collecting the path loss among pairs of APs in a network for which labels are available and perform an extensive comparison of our methodology against state-of-the-art change point methodologies, which conclusively shows RAPF to yield the most robust detection capabilities.
@inproceedings{CoNEXT-23c,
title = {Toward Generative Data Augmentation for Traffic Classification},
author = {Wang, Chao and Finamore, Alessandro and Gallo, Massimo and Michiardi, Pietro and Rossi, Dario},
booktitle = {ACM CoNEXT, Student Workshop},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-student},
month = dec,
year = {2023}
}
[CoNEXT-23d]
Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario,
"Memory-efficient Random Forests in FPGA SmartNICs"
ACM CoNEXT, Poster session
dec.
2023,
Conference
@inproceedings{CoNEXT-23d,
title = {Memory-efficient Random Forests in FPGA SmartNICs},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT, Poster session},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-poster},
month = dec,
year = {2023}
}
Random Forests (RF) have been a popular Machine Learning (ML) algorithm for more than two decades. This success can be attributed to its simplicity, effectiveness and explainability. However, implementing them in a high-speed programmable data plane is not trivial. To make predictions, i.e., inference, RFs must traverse each tree from the root to the leaf by comparing the features vector at each split node. This process is particularly challenging in network devices where memory is limited, and packet processing cannot be delayed, i.e., predictions occur at line rate. Nevertheless, this implementation is crucial for incorporating recent ML advances in the network, which could benefit use cases such as scheduling, measurements, and routing [1]. Prior studies such as Planter [4] have examined the implementation of RF in network switches, mapping trees to Match-Action Tables (MAT). Another line of work focused on RF implementations optimized for FPGA, mapping tree layers to pipeline stages as done in [2]. Such approaches use different tree representations that naturally come with their strengths and weaknesses depending on the trees’ sparsity, depth, and input features. In this work we (1) propose a novel representation for FPGA-based Random Forests, (2) compare it against state-of-the-art implementations in terms of memory and computation requirements, and (3) evaluate our design on a flow classification task using CAIDA traffic traces.
@inproceedings{ICDM-23,
author = {Kong, Lanfang and Huet, Alexis and Rossi, Dario and Sozio, Mauro},
title = {Tree-based Kendall tau Maximization for Explainable Unsupervised Anomaly Detection},
booktitle = {IEEE International Conference on Data Mining (ICDM)},
year = {2023},
month = dec,
howpublished = {https://ieeexplore.ieee.org/abstract/document/10415648}
}
We study the problem of building a regression tree with relatively small size, which maximizes the Kendall’s tau coefficient between the anomaly scores of a source anomaly detection algorithm and those predicted by our regression tree. We consider a labeling function which assigns to each leaf the inverse of its size, thereby providing satisfactory explanations when comparing examples with different anomaly scores. We show that our approach can be used as a post-hoc model, i.e. to provide global explanations for an existing anomaly detection algorithm. Moreover, it can be used as an in-model approach, i.e. the source anomaly detection algorithm can be replaced all together. This is made possible by leveraging the off-the-shelf transparency of tree-based approaches and from the fact that the explanations provided by our approach do not rely on the source anomaly detection algorithm. The main technical challenge to tackle is the efficient computation of the Kendall’s tau coefficients when determining the best split at each node of the regression tree. We show how such a coefficient can be computed incrementally, thereby making the running time of our algorithm almost linear (up to a logarithmic factor) in the size of the input. Our approach is completely unsupervised, which is appealing in the case when it is difficult to collect a large number of labeled examples. We complement our study with an extensive experimental evaluation against the state-of-the-art, showing the effectiveness of our approach.
[TNSM-23]
Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario,
"Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes"
In IEEE Transactions on Network and Service Management,
Vol. 20,
No. 4,
pp.5012-5025,
dec.
2023,
DOI 10.1109/TNSM.2023.3267671
Journal
@article{DR:TNSM-23,
author = {Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes},
month = dec,
year = {2023},
volume = {20},
number = {4},
pages = {5012-5025},
doi = {10.1109/TNSM.2023.3267671},
howpublished = {https://ieeexplore.ieee.org/document/10102919}
}
Darknets collect unsolicited traffic reaching unused address spaces. They provide insights into malicious activities, such as the rise of botnets and DDoS attacks. However, darknets provide a shallow view, as traffic is never responded. Here we quantify how their visibility increases by responding to traffic with interactive responders with increasing levels of interaction. We consider four deployments: Darknets, simple, vertical bound to specific ports, and, a honeypot that responds to all protocols on any port. We contrast these alternatives by analyzing the traffic attracted by each deployment and characterizing how traffic changes throughout the responder lifecycle on the darknet. We show that the deployment of responders increases the value of darknet data by revealing patterns that would otherwise be unobservable. We measure Side-Scan phenomena where once a host starts responding, it attracts traffic to other ports and neighboring addresses. uncovers attacks that darknets and would not observe, e.g. large-scale activity on non-standard ports. And we observe how quickly senders can identify and attack new responders. The “enlightened” part of a darknet brings several benefits and offers opportunities to increase the visibility of sender patterns. This information gain is worth taking advantage of, and we, therefore, recommend that organizations consider this option.
@inproceedings{IMC-23,
title = {Replicating: Contrastive Learning and Data Augmentation in Traffic Classification Using a Flowpic Input Representation},
author = {Finamore, Alessandro and Wang, Chao and Krolikowski, Jonatan and Navarro, Jose M. and Chen, Fuxing and Rossi, Dario},
year = {2023},
month = oct,
booktitle = {ACM Internet Measurement Conference (IMC)},
arxiv = {https://arxiv.org/abs/2309.09733}
}
Over the last years we witnessed a renewed interest towards Traffic Classification (TC) captivated by the rise of Deep Learning (DL). Yet, the vast majority of TC literature lacks code artifacts, performance assessments across datasets and reference comparisons against Machine Learning (ML) methods. Among those works, a recent study from IMC’22 [17] is worth of attention since it adopts recent DL methodologies (namely, few-shot learning, self-supervision via contrastive learning and data augmentation) appealing for networking as they enable to learn from a few samples and transfer across datasets. The main result of [17] on the UCDAVIS19, ISCX-VPN and ISCX-Tor datasets is that, with such DL methodologies, 100 input samples are enough to achieve very high accuracy using an input representation called "flowpic" (i.e., a per-flow 2d histograms of the packets size evolution over time). In this paper (i) we reproduce [17] on the same datasets and (ii) we replicate its most salient aspect (the importance of data augmentation) on three additional public datasets, MIRAGE-19, MIRAGE-22 and UTMOBILENET21. While we confirm most of the original results, we also found a 20% accuracy drop on some of the investigated scenarios due to a data shift in the original dataset that we uncovered. Additionally, our study validates that the data augmentation strategies studied in [17] perform well on other datasets too. In the spirit of reproducibility and replicability we make all artifacts (code and data) available at [10].
@inproceedings{DR:AutoML-23,
title = {Meta-Learning for Fast Model Recommendation in Unsupervised Multivariate Time Series Anomaly Detection},
author = {Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario},
year = {2023},
month = sep,
booktitle = {AutoML Conference},
howpublished = {https://openreview.net/pdf?id=7cUV9K3ns9Q},
dataseturl = {https://figshare.com/articles/software/Meta-Learning_for_Fast_Model_Recommendation_in_Unsupervised_Multivariate_Time_Series_Anomaly_Detection/22320367}
}
Unsupervised model recommendation for anomaly detection is a recent discipline for which there is no existing work that focuses on multivariate time series data. This paper studies that problem under real-world restrictions, most notably: (i) a limited time to issue a recommendation, which renders existing methods based around the testing of a large pool of models unusable; (ii) the need for generalization to previously unseen data sources, which is seldom factored in the experimental evaluation. We turn to meta-learning and propose Hydra, the first meta-recommender for anomaly detection in literature that we especially analyze in the context of multivariate times series. We conduct our experiments using 94 public datasets from 4 different data sources. Our ablation study testifies that our meta-recommender achieves a higher performance than the current state of the art, including in difficult scenarios in which data similarity is minimal: our proposal is able to recommend a model in the top 10% (13%) of the algorithmic pool for known (unseen) sources of data.
@inproceedings{DR:KDD-23,
title = {A Lightweight, Efficient and Explainable-by-Design Convolutional Neural Network for Internet Traffic Classification},
author = {Fauvel, Kevin and Chen, Fuxing and Rossi, Dario},
booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD'23)},
year = {2023},
month = aug,
arxiv = {https://arxiv.org/abs/2202.05535},
howpublished = {https://dl.acm.org/doi/10.1145/3580305.3599762}
}
Traffic classification, i.e. the identification of the type of applications flowing in a network, is a strategic task for numerous activities (e.g., intrusion detection, routing). This task faces some critical challenges that current deep learning approaches do not address. The design of current approaches do not take into consideration the fact that networking hardware (e.g., routers) often runs with limited computational resources. Further, they do not meet the need for faithful explainability highlighted by regulatory bodies. Finally, these traffic classifiers are evaluated on small datasets which fail to reflect the diversity of applications in real-world settings. Therefore, this paper introduces a new Lightweight, Efficient and eXplainable-by-design convolutional neural network (LEXNet) for Internet traffic classification, which relies on a new residual block (for lightweight and efficiency purposes) and prototype layer (for explainability). Based on a commercial-grade dataset, our evaluation shows that LEXNet succeeds to maintain the same accuracy as the best performing state-of-the-art neural network, while providing the additional features previously mentioned. Moreover, we illustrate the explainability feature of our approach, which stems from the communication of detected application prototypes to the end-user, and we highlight the faithfulness of LEXNet explanations through a comparison with post hoc methods
[TOIT-23]
Gioacchini, Luca and Vassio, Luca and Mellia, Marco and Drago, Idilio and Houidi, Zied Ben and Rossi, Dario,
"i-DarkVec: Incremental Embeddings for Darknet Traffic Analysis"
In ACM Trans. Internet Technol.,
Vol. 23,
No. 3,
aug.
2023,
DOI 10.1145/3595378
Journal
@article{DR:TOIT-23,
author = {Gioacchini, Luca and Vassio, Luca and Mellia, Marco and Drago, Idilio and Houidi, Zied Ben and Rossi, Dario},
title = {i-DarkVec: Incremental Embeddings for Darknet Traffic Analysis},
year = {2023},
volume = {23},
number = {3},
issn = {1533-5399},
url = {https://doi.org/10.1145/3595378},
howpublished = {https://dl.acm.org/doi/10.1145/3595378},
doi = {10.1145/3595378},
journal = {ACM Trans. Internet Technol.},
month = aug,
articleno = {45},
numpages = {28},
keywords = {darknet, Network Measurements, Word2Vec}
}
Darknets are probes listening to traffic reaching IP addresses that host no services. Traffic reaching a darknet results from the actions of internet scanners, botnets, and possibly misconfigured hosts. Such peculiar nature of the darknet traffic makes darknets a valuable instrument to discover malicious online activities, e.g., identifying coordinated actions performed by bots or scanners. However, the massive amount of packets and sources that darknets observe makes it hard to extract meaningful insights, calling for scalable tools to automatically identify and group sources that share similar behaviour.We here present i-DarkVec, a methodology to learn meaningful representations of Darknet traffic. i-DarkVec leverages Natural Language Processing techniques (e.g., Word2Vec) to capture the co-occurrence patterns that emerge when scanners or bots launch coordinated actions. As in NLP problems, the embeddings learned with i-DarkVec enable several new machine learning tasks on the darknet traffic, such as identifying clusters of senders engaged in similar activities.We extensively test i-DarkVec and explore its design space in a case study using real darknets. We show that with a proper definition of services, the learned embeddings can be used to (i) solve the classification problem to associate unknown sources’ IP addresses to the correct classes of coordinated actors and (ii) automatically identify clusters of previously unknown sources performing similar attacks and scans, easing the security analyst’s job. i-DarkVec leverages a novel incremental embedding learning approach that is scalable and robust to traffic changes, making it applicable to dynamic and large-scale scenarios.
@inproceedings{DR:TMA-23,
title = {Many or Few Samples? Comparing Transfer, Contrastive and Meta-Learning in Encrypted Traffic Classification},
author = {Guarino, Idio and Wang, Chao and Finamore, Alessandro and Pescape, Antonio and Rossi, Dario},
year = {2023},
month = jun,
booktitle = {Network Traffic Measurement and Analysis Conference (TMA)},
doi = {10.23919/TMA58422.2023.10198965},
arxiv = {https://arxiv.org/abs/2305.12432},
howpublished = {https://ieeexplore.ieee.org/document/10198965}
}
The popularity of Deep Learning (DL), coupled with network traffic visibility reduction due to the increased adoption of HTTPS, QUIC and DNS-SEC, re-ignited interest towards Traffic Classification (TC). However, to tame the dependency from task-specific large labeled datasets we need to find better ways to learn representations that are valid across tasks. In this work we investigate this problem comparing transfer learning, meta-learning and contrastive learning against reference Machine Learning (ML) tree-based and monolithic DL models (16 methods total). Using two publicly available datasets, namely MIRAGE19 (40 classes) and AppClassNet (500 classes), we show that (i) using large datasets we can obtain more general representations, (ii) contrastive learning is the best methodology and (iii) meta-learning the worst one, and (iv) while ML tree-based cannot handle large tasks but fits well small tasks, by means of reusing learned representations, DL methods are reaching tree-based models performance also for small tasks.
[PATENT-PCT/CN2023/080516]
FAUVEL, Kevin and ZHAO, Yong and CAO, Zigang and CHEN, Maolin and CHEN, Fuxing and ROSSI, Dario,
"Traffic Classification with a Modifiable Ruleset" , Patent PCT/CN2023/080516
mar.
2023,
Patent
@misc{DR:PATENT-PCT/CN2023/080516,
author = {FAUVEL, Kevin and ZHAO, Yong and CAO, Zigang and CHEN, Maolin and CHEN, Fuxing and ROSSI, Dario},
title = {Traffic Classification with a Modifiable Ruleset},
note = {Patent PCT/CN2023/080516},
month = mar,
topic = {tc-xai},
patent = {True},
year = {2023}
}
@misc{arXiv:2302.10676,
title = {User-aware WLAN Transmit Power Control in the Wild},
author = {Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
year = {2023},
month = feb,
arxiv = {https://arxiv.org/abs/2302.10676},
howpublished = {https://arxiv.org/abs/2302.10676}
}
In Wireless Local Area Networks (WLANs), Access point (AP) transmit power influences (i) received signal quality for users and thus user throughput, (ii) user association and thus load across APs and (iii) AP coverage ranges and thus interference in the network. Despite decades of academic research, transmit power levels are still, in practice, statically assigned to satisfy uniform coverage objectives. Yet each network comes with its unique distribution of users in space, calling for a power control that adapts to users’ probabilities of presence, for example, placing the areas with higher interference probabilities where user density is the lowest. Although nice on paper, putting this simple idea in practice comes with a number of challenges, with gains that are difficult to estimate, if any at all. This paper is the first to address these challenges and evaluate in a production network serving thousands of daily users the benefits of a user-aware transmit power control system. Along the way, we contribute a novel approach to reason about user densities of presence from historical IEEE 802.11k data, as well as a new machine learning approach to impute missing signal-strength measurements. Results of a thorough experimental campaign show feasibility and quantify the gains: compared to state-of-the-art solutions, the new system can increase the median signal strength by 15dBm, while decreasing airtime interference at the same time. This comes at an affordable cost of a 5dBm decrease in uplink signal due to lack of terminal cooperation.
@inproceedings{DR:AAAI-23-PDL,
author = {Azorin, Raphael and Gallo, Massimo and Finamore, Alessandro and Rossi, Dario and Michiardi, Pietro},
title = {"It's a Match!" -- A Benchmark of Task Affinity Scores for Joint Learning},
booktitle = {AAAI'23, International Workshop on Practical Deep Learning in the Wild},
arxiv = {https://arxiv.org/abs/2301.02873},
month = feb,
year = {2023},
howpublished = {https://arxiv.org/abs/2301.02873}
}
While the promises of Multi-Task Learning (MTL) are attractive, characterizing the conditions of its success is still an open problem in Deep Learning. Some tasks may benefit from being learned together while others may be detrimental to one another. From a task perspective, grouping cooperative tasks while separating competing tasks is paramount to reap the benefits of MTL, i.e., reducing training and inference costs. Therefore, estimating task affinity for joint learning is a key endeavor. Recent work suggests that the training conditions themselves have a significant impact on the outcomes of MTL. Yet, the literature is lacking of a benchmark to assess the effectiveness of tasks affinity estimation techniques and their relation with actual MTL performance. In this paper, we take a first step in recovering this gap by (i) defining a set of affinity scores by both revisiting contributions from previous literature as well presenting new ones and (ii) benchmarking them on the Taskonomy dataset. Our empirical campaign reveals how, even in a small-scale scenario, task affinity scoring does not correlate well with actual MTL performance. Yet, some metrics can be more indicative than others
[PATENT-PCT/EP2023/053192]
HUET, Alexis and NAVARRO, Jose Manuel and ROSSI, Dario,
"Identifying and locating change points in a time series of data values" , Patent PCT/EP2023/053192
feb.
2023,
Patent
@misc{DR:PATENT-PCT/EP2023/053192,
author = {HUET, Alexis and NAVARRO, Jose Manuel and ROSSI, Dario},
title = {Identifying and locating change points in a time series of data values},
note = {Patent PCT/EP2023/053192},
month = feb,
topic = {ad},
patent = {True},
year = {2023}
}
@article{DR:COMMAG-23,
author = {Iacoboaiea, Ovidiu and Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
title = {From Design to Deployment of Zero-touch Deep Reinforcement Learning WLANs},
journal = {IEEE Communication Magazine},
arxiv = {https://arxiv.org/abs/2207.06172},
howpublished = {https://ieeexplore.ieee.org/document/9992177},
year = {2023},
topic = {wlan-algo},
month = feb,
volume = {61},
issue = {2},
doi = {10.1109/MCOM.002.2200318}
}
Machine learning is increasingly used to automate networking tasks, in a paradigm known as zero touch network and service management (ZSM). In particular, deep reinforcement learning (DRL) techniques have recently gained much attention for their ability to learn taking complex decisions in different fields. In the ZSM context, DRL is an appealing candidate for tasks such as dynamic resource allocation, which are generally formulated as hard optimization problems. At the same time, successful training and deployment of DRL agents in realworld scenarios face a number of challenges that we outline and address in this article. Tackling the case of wireless local area network radio resource management, we report guidelines that extend to other use-cases and more general contexts.
[PATENT-PCT/EP2023/050709]
KROLIKOWSKI, Jonatan and HOUIDI, Zied BEN and CHEN, Fuxing and ROSSI, Dario,
"A data-driven WLAN topology-related KPIs query system and estimation method" , Patent PCT/EP2023/050709
jan.
2023,
Patent
@misc{DR:PATENT-PCT/EP2023/050709,
author = {KROLIKOWSKI, Jonatan and HOUIDI, Zied BEN and CHEN, Fuxing and ROSSI, Dario},
title = {A data-driven WLAN topology-related KPIs query system and estimation method},
note = {Patent PCT/EP2023/050709},
month = jan,
topic = {wlan},
patent = {True},
year = {2023}
}
@article{DR:ENTROPY-23,
author = {Franzese, Giulio and Rossi, Simone and Yang, Lixuan and Finamore, Alessandro and Rossi, Dario and Filippone, Maurizio and Michiardi, Pietro},
title = {How Much Is Enough? A Study on Diffusion Times in Score-Based Generative Models},
journal = {Entropy},
volume = {25},
year = {2023},
number = {4},
article-number = {633},
issn = {1099-4300},
doi = {10.3390/e25040633},
howpublished = {https://arxiv.org/abs/2206.05173}
}
Score-based diffusion models are a class of generative models whose dynamics is described by stochastic differential equations that map noise into data. While recent works have started to lay down a theoretical foundation for these models, a detailed understanding of the role of the diffusion time T is still lacking. Current best practice advocates for a large T to ensure that the forward dynamics brings the diffusion sufficiently close to a known and simple noise distribution; however, a smaller value of T should be preferred for a better approximation of the score-matching objective and higher computational efficiency. Starting from a variational interpretation of diffusion models, in this work we quantify this trade-off and suggest a new method to improve quality and efficiency of both training and sampling, by adopting smaller diffusion times. Indeed, we show how an auxiliary model can be used to bridge the gap between the ideal and the simulated forward dynamics, followed by a standard reverse diffusion process. Empirical results support our analysis; for image data, our method is competitive with regard to the state of the art, according to standard sample quality metrics and log-likelihood.
[TNSM-24b]
Gioacchini, Luca and Mellia, Marco and Vassio, Luca and Drago, Idilio and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario,
"Cross-Network Embeddings Transfer for Traffic Analysis"
In IEEE Transactions on Network and Service Management,
Vol. 21,
No. 3,
pp.2686-2699,
jun.
2024,
DOI 10.1109/TNSM.2023.3329442
Journal
@article{TNSM-24b,
author = {Gioacchini, Luca and Mellia, Marco and Vassio, Luca and Drago, Idilio and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Cross-Network Embeddings Transfer for Traffic Analysis},
month = jun,
year = {2024},
volume = {21},
number = {3},
pages = {2686-2699},
doi = {10.1109/TNSM.2023.3329442},
howpublished = {https://ieeexplore.ieee.org/abstract/document/10304313}
}
Artificial Intelligence (AI) approaches have emerged as powerful tools to improve traffic analysis for network monitoring and management. However, the lack of large labeled datasets and the ever-changing networking scenarios make a fundamental difference compared to other domains where AI is thriving. We believe the ability to transfer the specific knowledge acquired in one network (or dataset) to a different network (or dataset) would be fundamental to speed up the adoption of AI-based solutions for traffic analysis and other networking applications (e.g., cybersecurity). We here propose and evaluate different options to transfer the knowledge built from a provider network, owning data and labels, to a customer network that desires to label its traffic but lacks labels. We formulate this problem as a domain adaptation problem that we solve with embedding alignment techniques and canonical transfer learning approaches. We present a thorough experimental analysis to assess the performance considering both supervised (e.g., classification) and unsupervised (e.g., novelty detection) downstream tasks related to darknet and honeypot traffic. Our experiments show the proper transfer techniques to use the models obtained from a network in a different network. We believe our contribution opens new opportunities and business models where network providers can successfully share their knowledge and AI models with customers.
@techrep{arxiv:2405.02649,
title = {Generic Multi-modal Representation Learning for Network Traffic Analysis},
author = {Gioacchini, Luca and Drago, Idilio and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
month = may,
year = {2024},
arxiv = {https://arxiv.org/abs/2405.02649},
howpublished = {https://arxiv.org/abs/2405.02649}
}
[COMNET-24]
Cerasuolo, Francesco and Nascita, Alfredo and Bovenzi, Giampaolo and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario,
"MEMENTO: A novel approach for class incremental learning of encrypted traffic"
In Computer Networks,
pp.110374,
may.
2024,
DOI https://doi.org/10.1016/j.comnet.2024.110374
Journal
@article{COMNET-24,
title = {{MEMENTO: A novel approach for class incremental learning of encrypted traffic}},
journal = {Computer Networks},
pages = {110374},
year = {2024},
month = may,
issn = {1389-1286},
doi = {https://doi.org/10.1016/j.comnet.2024.110374},
howpublished = {https://www.sciencedirect.com/science/article/pii/S1389128624002068},
author = {Cerasuolo, Francesco and Nascita, Alfredo and Bovenzi, Giampaolo and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario},
keywords = {Traffic classification, Class incremental learning, Mobile apps, Encrypted traffic, Deep learning}
}
In the ever-changing digital environment, ensuring the ongoing effectiveness of traffic analysis and security measures is crucial. Therefore, Class Incremental Learning (CIL) in encrypted Traffic Classification (TC) is essential for adapting to evolving network behaviors and the rapid development of new applications. However, the application of CIL techniques in the TC domain is not straightforward, usually leading to unsatisfactory performance figures. Specifically, the improvement goal is to reduce forgetting on old apps and increase the capacity in learning new ones, in order to improve overall classification performance— reducing the drop from a model “trained-from-scratch”. The contribution of this work is the design of a novel fine-tuning approach called MEMENTO, which is obtained through the careful design of different building blocks: memory management, model training, and rectification strategies. In detail, we propose the application of traffic biflows augmentation strategies to better capitalize on old apps biflows, we introduce improvements in the distillation stage, and we design a general rectification strategy that includes several existing proposals. To assess our proposal, we leverage two publicly-available encrypted network traffic datasets, i.e., MIRAGE19 and CESNET-TLS22. As a result, on both datasets MEMENTO achieves a significant improvement in classifying new apps (w.r.t. the best-performing alternative, i.e., BiC) while maintaining stable performance on old ones. Equally important, MEMENTO achieves satisfactory overall TC performance, filling the gap toward a trained-from-scratch model and offering a considerable gain in terms of time (up to 10× speed-up) to obtain up-to-date and running classifiers. The experimental evaluation relies on a comprehensive performance evaluation workbench for CIL proposals, which is based on a wider set of metrics (as opposed to the existing literature in TC).
[PAM-24]
Wang, Chao and Finamore, Alessandro and Pietro, Michiardi and Gallo, Massimo and Rossi, Dario,
"Data Augmentation for Traffic Classification"
Passive and Active Measurements (PAM)
apr.
2024,
arXiv Conference Runner-up
@inproceedings{PAM-24,
title = {{Data Augmentation for Traffic Classification}},
author = {Wang, Chao and Finamore, Alessandro and Pietro, Michiardi and Gallo, Massimo and Rossi, Dario},
year = {2024},
month = apr,
booktitle = {Passive and Active Measurements (PAM)},
note = {bestpaperrunnerup},
arxiv = {https://arxiv.org/abs/2401.10754},
howpublished = {https://arxiv.org/abs/2401.10754}
}
@article{CoNEXT-24a,
author = {Azorin, Raphael and Monterubbiano, Andrea and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
title = {Taming the Elephants: Affordable Flow Length Prediction in the Data Plane},
year = {2024},
month = mar,
howpublished = {https://dl.acm.org/doi/abs/10.1145/3649473},
url = {https://doi.org/10.1145/3649473},
doi = {10.1145/3649473},
journal = {Proc. of CoNEXT'24 (PACMNET).},
articleno = {5},
numpages = {24},
keywords = {data plane, in-network machine learning, per-flow monitoring}
}
Machine Learning (ML) shows promising potential for enhancing networking tasks by providing early traffic predictions. However, implementing an ML-enabled system is a challenging task due to network devices limited resources. While previous works have shown the feasibility of running simple ML models in the data plane, integrating them into a practical end-to-end system is not an easy task. It requires addressing issues related to resource management and model maintenance to ensure that the performance improvement justifies the system overhead. In this work, we propose DUMBO, a versatile end-to-end system to generate and exploit early flow size predictions at line rate. Our system seamlessly integrates and maintains a simple ML model that offers early coarse-grain flow size prediction in the data plane. We evaluate the proposed system on flow scheduling, per-flow packet inter-arrival time distribution, and flow size estimation using real traffic traces, and perform experiments using an FPGA prototype running on an AMD(R)-Xilinx(R) Alveo U280 SmartNIC. Our results show that DUMBO outperforms traditional state-of-the-art approaches by equipping network devices data planes with a lightweight ML model. Code is available at https://github.com/cpt-harlock/DUMBO.
[TNSM-24a]
Bovenzi, Giampaolo and Nascita, Alfredo and Yang, Lixuan and Finamore, Alessandro and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario,
"Benchmarking Class Incremental Learning in Deep Learning Traffic Classification"
In IEEE Transactions on Network and Service Management,
Vol. 21,
No. 1,
pp.51-69,
feb.
2024,
DOI 10.1109/TNSM.2023.3287430
Journal
@article{TNSM-24a,
author = {Bovenzi, Giampaolo and Nascita, Alfredo and Yang, Lixuan and Finamore, Alessandro and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Benchmarking Class Incremental Learning in Deep Learning Traffic Classification},
year = {2024},
volume = {21},
month = feb,
number = {1},
pages = {51-69},
doi = {10.1109/TNSM.2023.3287430},
howpublished = {https://ieeexplore.ieee.org/abstract/document/10155294}
}
Traffic Classification (TC) is experiencing a renewed interest, fostered by the growing popularity of Deep Learning (DL) approaches. In exchange for their proved effectiveness, DL models are characterized by a computationally-intensive training procedure that badly matches the fast-paced release of new (mobile) applications, resulting in significantly limited efficiency of model updates. To address this shortcoming, in this work we systematically explore Class Incremental Learning (CIL) techniques, aimed at adding new apps/services to pre-existing DL-based traffic classifiers without a full retraining, hence speeding up the model’s updates cycle. We investigate a large corpus of state-of-the-art CIL approaches for the DL-based TC task, and delve into their working principles to highlight relevant insight, aiming to understand if there is a case for CIL in TC. We evaluate and discuss their performance varying the number of incremental learning episodes, and the number of new apps added for each episode. Our evaluation is based on the publicly available MIRAGE19 dataset comprising traffic of 40 popular Android applications, fostering reproducibility. Despite our analysis reveals their infancy, CIL techniques are a promising research area on the roadmap towards automated DL-based traffic analysis systems
@inproceedings{CoNEXT-23a,
title = {{SPADA: A Sparse Approximate Data Structure representation for data plane per-flow monitoring}},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program},
month = dec,
year = {2023}
}
@inproceedings{CoNEXT-23b,
title = {Change Point Detection in WLANs with Random AP Forests},
author = {Huet, Alexis and Krolikowski, Jonatan and Navarro, Jose Manuel and Chen, Fuxing and Rossi, Dario},
booktitle = {ACM CoNEXT},
doi = {10.1145/3624354.3630587},
howpublished = {https://doi.org/10.1145/3624354.3630587},
month = dec,
year = {2023}
}
Troubleshooting WiFi networks is knowingly difficult due to the variability of the wireless medium. Complementary to existing works that focus on detecting short-term fluctuations of radio signals (i.e., anomalies), we tackle the problem of reliably detecting long-term changes in statistical properties of WiFi networks. We propose a new method to reliably gain insights on such environmental changes, which we refer to as Random Access Point Forest (RAPF). RAPF identifies the changes from a forest of individual learners, each of them consisting of a random tree approximating the signal of a specific pair of APs. The biased selection of APs in a distributed manner along with the stochastic construction of each individual tree ensure its robustness to noise and biases. We conduct a measurement campaign on a real WLAN by collecting the path loss among pairs of APs in a network for which labels are available and perform an extensive comparison of our methodology against state-of-the-art change point methodologies, which conclusively shows RAPF to yield the most robust detection capabilities.
@inproceedings{CoNEXT-23c,
title = {Toward Generative Data Augmentation for Traffic Classification},
author = {Wang, Chao and Finamore, Alessandro and Gallo, Massimo and Michiardi, Pietro and Rossi, Dario},
booktitle = {ACM CoNEXT, Student Workshop},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-student},
month = dec,
year = {2023}
}
[CoNEXT-23d]
Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario,
"Memory-efficient Random Forests in FPGA SmartNICs"
ACM CoNEXT, Poster session
dec.
2023,
Conference
@inproceedings{CoNEXT-23d,
title = {Memory-efficient Random Forests in FPGA SmartNICs},
author = {Monterubbiano, Andrea and Azorin, Raphael and Castellano, Gabriele and Gallo, Massimo and Pontarelli, Salvatore and Rossi, Dario},
booktitle = {ACM CoNEXT, Poster session},
howpublished = {https://conferences.sigcomm.org/co-next/2023/#!/program-poster},
month = dec,
year = {2023}
}
Random Forests (RF) have been a popular Machine Learning (ML) algorithm for more than two decades. This success can be attributed to its simplicity, effectiveness and explainability. However, implementing them in a high-speed programmable data plane is not trivial. To make predictions, i.e., inference, RFs must traverse each tree from the root to the leaf by comparing the features vector at each split node. This process is particularly challenging in network devices where memory is limited, and packet processing cannot be delayed, i.e., predictions occur at line rate. Nevertheless, this implementation is crucial for incorporating recent ML advances in the network, which could benefit use cases such as scheduling, measurements, and routing [1]. Prior studies such as Planter [4] have examined the implementation of RF in network switches, mapping trees to Match-Action Tables (MAT). Another line of work focused on RF implementations optimized for FPGA, mapping tree layers to pipeline stages as done in [2]. Such approaches use different tree representations that naturally come with their strengths and weaknesses depending on the trees’ sparsity, depth, and input features. In this work we (1) propose a novel representation for FPGA-based Random Forests, (2) compare it against state-of-the-art implementations in terms of memory and computation requirements, and (3) evaluate our design on a flow classification task using CAIDA traffic traces.
@inproceedings{ICDM-23,
author = {Kong, Lanfang and Huet, Alexis and Rossi, Dario and Sozio, Mauro},
title = {Tree-based Kendall tau Maximization for Explainable Unsupervised Anomaly Detection},
booktitle = {IEEE International Conference on Data Mining (ICDM)},
year = {2023},
month = dec,
howpublished = {https://ieeexplore.ieee.org/abstract/document/10415648}
}
We study the problem of building a regression tree with relatively small size, which maximizes the Kendall’s tau coefficient between the anomaly scores of a source anomaly detection algorithm and those predicted by our regression tree. We consider a labeling function which assigns to each leaf the inverse of its size, thereby providing satisfactory explanations when comparing examples with different anomaly scores. We show that our approach can be used as a post-hoc model, i.e. to provide global explanations for an existing anomaly detection algorithm. Moreover, it can be used as an in-model approach, i.e. the source anomaly detection algorithm can be replaced all together. This is made possible by leveraging the off-the-shelf transparency of tree-based approaches and from the fact that the explanations provided by our approach do not rely on the source anomaly detection algorithm. The main technical challenge to tackle is the efficient computation of the Kendall’s tau coefficients when determining the best split at each node of the regression tree. We show how such a coefficient can be computed incrementally, thereby making the running time of our algorithm almost linear (up to a logarithmic factor) in the size of the input. Our approach is completely unsupervised, which is appealing in the case when it is difficult to collect a large number of labeled examples. We complement our study with an extensive experimental evaluation against the state-of-the-art, showing the effectiveness of our approach.
[TNSM-23]
Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario,
"Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes"
In IEEE Transactions on Network and Service Management,
Vol. 20,
No. 4,
pp.5012-5025,
dec.
2023,
DOI 10.1109/TNSM.2023.3267671
Journal
@article{DR:TNSM-23,
author = {Soro, Francesca and Favale, Thomas and Giordano, Danilo and Drago, Idilio and Rescio, Tommaso and Mellia, Marco and Houidi, Zied Ben and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = {Enlightening the Darknets: Augmenting Darknet Visibility with Active Probes},
month = dec,
year = {2023},
volume = {20},
number = {4},
pages = {5012-5025},
doi = {10.1109/TNSM.2023.3267671},
howpublished = {https://ieeexplore.ieee.org/document/10102919}
}
Darknets collect unsolicited traffic reaching unused address spaces. They provide insights into malicious activities, such as the rise of botnets and DDoS attacks. However, darknets provide a shallow view, as traffic is never responded. Here we quantify how their visibility increases by responding to traffic with interactive responders with increasing levels of interaction. We consider four deployments: Darknets, simple, vertical bound to specific ports, and, a honeypot that responds to all protocols on any port. We contrast these alternatives by analyzing the traffic attracted by each deployment and characterizing how traffic changes throughout the responder lifecycle on the darknet. We show that the deployment of responders increases the value of darknet data by revealing patterns that would otherwise be unobservable. We measure Side-Scan phenomena where once a host starts responding, it attracts traffic to other ports and neighboring addresses. uncovers attacks that darknets and would not observe, e.g. large-scale activity on non-standard ports. And we observe how quickly senders can identify and attack new responders. The “enlightened” part of a darknet brings several benefits and offers opportunities to increase the visibility of sender patterns. This information gain is worth taking advantage of, and we, therefore, recommend that organizations consider this option.
@inproceedings{IMC-23,
title = {Replicating: Contrastive Learning and Data Augmentation in Traffic Classification Using a Flowpic Input Representation},
author = {Finamore, Alessandro and Wang, Chao and Krolikowski, Jonatan and Navarro, Jose M. and Chen, Fuxing and Rossi, Dario},
year = {2023},
month = oct,
booktitle = {ACM Internet Measurement Conference (IMC)},
arxiv = {https://arxiv.org/abs/2309.09733}
}
Over the last years we witnessed a renewed interest towards Traffic Classification (TC) captivated by the rise of Deep Learning (DL). Yet, the vast majority of TC literature lacks code artifacts, performance assessments across datasets and reference comparisons against Machine Learning (ML) methods. Among those works, a recent study from IMC’22 [17] is worth of attention since it adopts recent DL methodologies (namely, few-shot learning, self-supervision via contrastive learning and data augmentation) appealing for networking as they enable to learn from a few samples and transfer across datasets. The main result of [17] on the UCDAVIS19, ISCX-VPN and ISCX-Tor datasets is that, with such DL methodologies, 100 input samples are enough to achieve very high accuracy using an input representation called "flowpic" (i.e., a per-flow 2d histograms of the packets size evolution over time). In this paper (i) we reproduce [17] on the same datasets and (ii) we replicate its most salient aspect (the importance of data augmentation) on three additional public datasets, MIRAGE-19, MIRAGE-22 and UTMOBILENET21. While we confirm most of the original results, we also found a 20% accuracy drop on some of the investigated scenarios due to a data shift in the original dataset that we uncovered. Additionally, our study validates that the data augmentation strategies studied in [17] perform well on other datasets too. In the spirit of reproducibility and replicability we make all artifacts (code and data) available at [10].
@inproceedings{DR:AutoML-23,
title = {Meta-Learning for Fast Model Recommendation in Unsupervised Multivariate Time Series Anomaly Detection},
author = {Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario},
year = {2023},
month = sep,
booktitle = {AutoML Conference},
howpublished = {https://openreview.net/pdf?id=7cUV9K3ns9Q},
dataseturl = {https://figshare.com/articles/software/Meta-Learning_for_Fast_Model_Recommendation_in_Unsupervised_Multivariate_Time_Series_Anomaly_Detection/22320367}
}
Unsupervised model recommendation for anomaly detection is a recent discipline for which there is no existing work that focuses on multivariate time series data. This paper studies that problem under real-world restrictions, most notably: (i) a limited time to issue a recommendation, which renders existing methods based around the testing of a large pool of models unusable; (ii) the need for generalization to previously unseen data sources, which is seldom factored in the experimental evaluation. We turn to meta-learning and propose Hydra, the first meta-recommender for anomaly detection in literature that we especially analyze in the context of multivariate times series. We conduct our experiments using 94 public datasets from 4 different data sources. Our ablation study testifies that our meta-recommender achieves a higher performance than the current state of the art, including in difficult scenarios in which data similarity is minimal: our proposal is able to recommend a model in the top 10% (13%) of the algorithmic pool for known (unseen) sources of data.
@inproceedings{DR:KDD-23,
title = {A Lightweight, Efficient and Explainable-by-Design Convolutional Neural Network for Internet Traffic Classification},
author = {Fauvel, Kevin and Chen, Fuxing and Rossi, Dario},
booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD'23)},
year = {2023},
month = aug,
arxiv = {https://arxiv.org/abs/2202.05535},
howpublished = {https://dl.acm.org/doi/10.1145/3580305.3599762}
}
Traffic classification, i.e. the identification of the type of applications flowing in a network, is a strategic task for numerous activities (e.g., intrusion detection, routing). This task faces some critical challenges that current deep learning approaches do not address. The design of current approaches do not take into consideration the fact that networking hardware (e.g., routers) often runs with limited computational resources. Further, they do not meet the need for faithful explainability highlighted by regulatory bodies. Finally, these traffic classifiers are evaluated on small datasets which fail to reflect the diversity of applications in real-world settings. Therefore, this paper introduces a new Lightweight, Efficient and eXplainable-by-design convolutional neural network (LEXNet) for Internet traffic classification, which relies on a new residual block (for lightweight and efficiency purposes) and prototype layer (for explainability). Based on a commercial-grade dataset, our evaluation shows that LEXNet succeeds to maintain the same accuracy as the best performing state-of-the-art neural network, while providing the additional features previously mentioned. Moreover, we illustrate the explainability feature of our approach, which stems from the communication of detected application prototypes to the end-user, and we highlight the faithfulness of LEXNet explanations through a comparison with post hoc methods
[TOIT-23]
Gioacchini, Luca and Vassio, Luca and Mellia, Marco and Drago, Idilio and Houidi, Zied Ben and Rossi, Dario,
"i-DarkVec: Incremental Embeddings for Darknet Traffic Analysis"
In ACM Trans. Internet Technol.,
Vol. 23,
No. 3,
aug.
2023,
DOI 10.1145/3595378
Journal
@article{DR:TOIT-23,
author = {Gioacchini, Luca and Vassio, Luca and Mellia, Marco and Drago, Idilio and Houidi, Zied Ben and Rossi, Dario},
title = {i-DarkVec: Incremental Embeddings for Darknet Traffic Analysis},
year = {2023},
volume = {23},
number = {3},
issn = {1533-5399},
url = {https://doi.org/10.1145/3595378},
howpublished = {https://dl.acm.org/doi/10.1145/3595378},
doi = {10.1145/3595378},
journal = {ACM Trans. Internet Technol.},
month = aug,
articleno = {45},
numpages = {28},
keywords = {darknet, Network Measurements, Word2Vec}
}
Darknets are probes listening to traffic reaching IP addresses that host no services. Traffic reaching a darknet results from the actions of internet scanners, botnets, and possibly misconfigured hosts. Such peculiar nature of the darknet traffic makes darknets a valuable instrument to discover malicious online activities, e.g., identifying coordinated actions performed by bots or scanners. However, the massive amount of packets and sources that darknets observe makes it hard to extract meaningful insights, calling for scalable tools to automatically identify and group sources that share similar behaviour.We here present i-DarkVec, a methodology to learn meaningful representations of Darknet traffic. i-DarkVec leverages Natural Language Processing techniques (e.g., Word2Vec) to capture the co-occurrence patterns that emerge when scanners or bots launch coordinated actions. As in NLP problems, the embeddings learned with i-DarkVec enable several new machine learning tasks on the darknet traffic, such as identifying clusters of senders engaged in similar activities.We extensively test i-DarkVec and explore its design space in a case study using real darknets. We show that with a proper definition of services, the learned embeddings can be used to (i) solve the classification problem to associate unknown sources’ IP addresses to the correct classes of coordinated actors and (ii) automatically identify clusters of previously unknown sources performing similar attacks and scans, easing the security analyst’s job. i-DarkVec leverages a novel incremental embedding learning approach that is scalable and robust to traffic changes, making it applicable to dynamic and large-scale scenarios.
@inproceedings{DR:TMA-23,
title = {Many or Few Samples? Comparing Transfer, Contrastive and Meta-Learning in Encrypted Traffic Classification},
author = {Guarino, Idio and Wang, Chao and Finamore, Alessandro and Pescape, Antonio and Rossi, Dario},
year = {2023},
month = jun,
booktitle = {Network Traffic Measurement and Analysis Conference (TMA)},
doi = {10.23919/TMA58422.2023.10198965},
arxiv = {https://arxiv.org/abs/2305.12432},
howpublished = {https://ieeexplore.ieee.org/document/10198965}
}
The popularity of Deep Learning (DL), coupled with network traffic visibility reduction due to the increased adoption of HTTPS, QUIC and DNS-SEC, re-ignited interest towards Traffic Classification (TC). However, to tame the dependency from task-specific large labeled datasets we need to find better ways to learn representations that are valid across tasks. In this work we investigate this problem comparing transfer learning, meta-learning and contrastive learning against reference Machine Learning (ML) tree-based and monolithic DL models (16 methods total). Using two publicly available datasets, namely MIRAGE19 (40 classes) and AppClassNet (500 classes), we show that (i) using large datasets we can obtain more general representations, (ii) contrastive learning is the best methodology and (iii) meta-learning the worst one, and (iv) while ML tree-based cannot handle large tasks but fits well small tasks, by means of reusing learned representations, DL methods are reaching tree-based models performance also for small tasks.
[PATENT-PCT/CN2023/080516]
FAUVEL, Kevin and ZHAO, Yong and CAO, Zigang and CHEN, Maolin and CHEN, Fuxing and ROSSI, Dario,
"Traffic Classification with a Modifiable Ruleset" , Patent PCT/CN2023/080516
mar.
2023,
Patent
@misc{DR:PATENT-PCT/CN2023/080516,
author = {FAUVEL, Kevin and ZHAO, Yong and CAO, Zigang and CHEN, Maolin and CHEN, Fuxing and ROSSI, Dario},
title = {Traffic Classification with a Modifiable Ruleset},
note = {Patent PCT/CN2023/080516},
month = mar,
topic = {tc-xai},
patent = {True},
year = {2023}
}
@misc{arXiv:2302.10676,
title = {User-aware WLAN Transmit Power Control in the Wild},
author = {Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
year = {2023},
month = feb,
arxiv = {https://arxiv.org/abs/2302.10676},
howpublished = {https://arxiv.org/abs/2302.10676}
}
In Wireless Local Area Networks (WLANs), Access point (AP) transmit power influences (i) received signal quality for users and thus user throughput, (ii) user association and thus load across APs and (iii) AP coverage ranges and thus interference in the network. Despite decades of academic research, transmit power levels are still, in practice, statically assigned to satisfy uniform coverage objectives. Yet each network comes with its unique distribution of users in space, calling for a power control that adapts to users’ probabilities of presence, for example, placing the areas with higher interference probabilities where user density is the lowest. Although nice on paper, putting this simple idea in practice comes with a number of challenges, with gains that are difficult to estimate, if any at all. This paper is the first to address these challenges and evaluate in a production network serving thousands of daily users the benefits of a user-aware transmit power control system. Along the way, we contribute a novel approach to reason about user densities of presence from historical IEEE 802.11k data, as well as a new machine learning approach to impute missing signal-strength measurements. Results of a thorough experimental campaign show feasibility and quantify the gains: compared to state-of-the-art solutions, the new system can increase the median signal strength by 15dBm, while decreasing airtime interference at the same time. This comes at an affordable cost of a 5dBm decrease in uplink signal due to lack of terminal cooperation.
@inproceedings{DR:AAAI-23-PDL,
author = {Azorin, Raphael and Gallo, Massimo and Finamore, Alessandro and Rossi, Dario and Michiardi, Pietro},
title = {"It's a Match!" -- A Benchmark of Task Affinity Scores for Joint Learning},
booktitle = {AAAI'23, International Workshop on Practical Deep Learning in the Wild},
arxiv = {https://arxiv.org/abs/2301.02873},
month = feb,
year = {2023},
howpublished = {https://arxiv.org/abs/2301.02873}
}
While the promises of Multi-Task Learning (MTL) are attractive, characterizing the conditions of its success is still an open problem in Deep Learning. Some tasks may benefit from being learned together while others may be detrimental to one another. From a task perspective, grouping cooperative tasks while separating competing tasks is paramount to reap the benefits of MTL, i.e., reducing training and inference costs. Therefore, estimating task affinity for joint learning is a key endeavor. Recent work suggests that the training conditions themselves have a significant impact on the outcomes of MTL. Yet, the literature is lacking of a benchmark to assess the effectiveness of tasks affinity estimation techniques and their relation with actual MTL performance. In this paper, we take a first step in recovering this gap by (i) defining a set of affinity scores by both revisiting contributions from previous literature as well presenting new ones and (ii) benchmarking them on the Taskonomy dataset. Our empirical campaign reveals how, even in a small-scale scenario, task affinity scoring does not correlate well with actual MTL performance. Yet, some metrics can be more indicative than others
[PATENT-PCT/EP2023/053192]
HUET, Alexis and NAVARRO, Jose Manuel and ROSSI, Dario,
"Identifying and locating change points in a time series of data values" , Patent PCT/EP2023/053192
feb.
2023,
Patent
@misc{DR:PATENT-PCT/EP2023/053192,
author = {HUET, Alexis and NAVARRO, Jose Manuel and ROSSI, Dario},
title = {Identifying and locating change points in a time series of data values},
note = {Patent PCT/EP2023/053192},
month = feb,
topic = {ad},
patent = {True},
year = {2023}
}
@article{DR:COMMAG-23,
author = {Iacoboaiea, Ovidiu and Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
title = {From Design to Deployment of Zero-touch Deep Reinforcement Learning WLANs},
journal = {IEEE Communication Magazine},
arxiv = {https://arxiv.org/abs/2207.06172},
howpublished = {https://ieeexplore.ieee.org/document/9992177},
year = {2023},
topic = {wlan-algo},
month = feb,
volume = {61},
issue = {2},
doi = {10.1109/MCOM.002.2200318}
}
Machine learning is increasingly used to automate networking tasks, in a paradigm known as zero touch network and service management (ZSM). In particular, deep reinforcement learning (DRL) techniques have recently gained much attention for their ability to learn taking complex decisions in different fields. In the ZSM context, DRL is an appealing candidate for tasks such as dynamic resource allocation, which are generally formulated as hard optimization problems. At the same time, successful training and deployment of DRL agents in realworld scenarios face a number of challenges that we outline and address in this article. Tackling the case of wireless local area network radio resource management, we report guidelines that extend to other use-cases and more general contexts.
[PATENT-PCT/EP2023/050709]
KROLIKOWSKI, Jonatan and HOUIDI, Zied BEN and CHEN, Fuxing and ROSSI, Dario,
"A data-driven WLAN topology-related KPIs query system and estimation method" , Patent PCT/EP2023/050709
jan.
2023,
Patent
@misc{DR:PATENT-PCT/EP2023/050709,
author = {KROLIKOWSKI, Jonatan and HOUIDI, Zied BEN and CHEN, Fuxing and ROSSI, Dario},
title = {A data-driven WLAN topology-related KPIs query system and estimation method},
note = {Patent PCT/EP2023/050709},
month = jan,
topic = {wlan},
patent = {True},
year = {2023}
}
@article{DR:ENTROPY-23,
author = {Franzese, Giulio and Rossi, Simone and Yang, Lixuan and Finamore, Alessandro and Rossi, Dario and Filippone, Maurizio and Michiardi, Pietro},
title = {How Much Is Enough? A Study on Diffusion Times in Score-Based Generative Models},
journal = {Entropy},
volume = {25},
year = {2023},
number = {4},
article-number = {633},
issn = {1099-4300},
doi = {10.3390/e25040633},
howpublished = {https://arxiv.org/abs/2206.05173}
}
Score-based diffusion models are a class of generative models whose dynamics is described by stochastic differential equations that map noise into data. While recent works have started to lay down a theoretical foundation for these models, a detailed understanding of the role of the diffusion time T is still lacking. Current best practice advocates for a large T to ensure that the forward dynamics brings the diffusion sufficiently close to a known and simple noise distribution; however, a smaller value of T should be preferred for a better approximation of the score-matching objective and higher computational efficiency. Starting from a variational interpretation of diffusion models, in this work we quantify this trade-off and suggest a new method to improve quality and efficiency of both training and sampling, by adopting smaller diffusion times. Indeed, we show how an auxiliary model can be used to bridge the gap between the ideal and the simulated forward dynamics, followed by a standard reverse diffusion process. Empirical results support our analysis; for image data, our method is competitive with regard to the state of the art, according to standard sample quality metrics and log-likelihood.
2022 # 20
[AICCSA-22]
Nesic, Stefan and Putina, Andrian and Bahri, Maroua and Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario and Sozio, Mauro,
"StreamRHF: Tree-based unsupervised anomaly detection for data streams"
19th ACS/IEEE International Conference on Computer Systems and Applications (AICCSA 2022)
dec.
2022,
Conference
@inproceedings{DR:AICCSA-22,
title = {{StreamRHF: Tree-based unsupervised anomaly detection for data streams}},
author = {Nesic, Stefan and Putina, Andrian and Bahri, Maroua and Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario and Sozio, Mauro},
year = {2022},
month = dec,
booktitle = {19th ACS/IEEE International Conference on Computer Systems and Applications (AICCSA 2022)},
howpublished = {https://nonsns.github.io/paper/rossi22aiccsa.pdf},
note = {project=huawei},
topic = {ad-algo}
}
We present StreamRHF, an unsupervised anomaly detection algorithm for data streams. Our algorithm builds on some of the ideas of Random Histogram Forest (RHF), a state-of-the-art algorithm for batch unsupervised anomaly detection. StreamRHF constructs a forest of decision trees, where feature splits are determined according to the kurtosis score of every feature. It irrevocably assigns an anomaly score to data points, as soon as they arrive, by means of an incremental computation of its random trees and the kurtosis scores of the features. This allows efficient online scoring and concept drift detection altogether. Our approach is tree-based which boasts several appealing properties, such as explainability of the results. We conduct an extensive experimental evaluation on multiple datasets from different real-world applications. Our evaluation shows that our streaming algorithm achieves comparable average precision to RHF while outperforming state-of-the-art streaming approaches for unsupervised anomaly detection with furthermore limited computational complexity.
[CoNEXT-NNI-22a]
Boffa, Matteo and Vassio, Luca and Drago, Idilio and Mellia, Marco and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario,
"On Using Pretext Tasks to Learn Representations from Network Logs"
ACM CoNext workshop on Native Network Intelligence (NNI)
dec.
2022,
Conference
@inproceedings{DR:CoNEXT-NNI-22a,
title = {{On Using Pretext Tasks to Learn Representations from Network Logs}},
author = {Boffa, Matteo and Vassio, Luca and Drago, Idilio and Mellia, Marco and Milan, Giulia and Houidi, Zied Ben and Rossi, Dario},
year = {2022},
month = dec,
topic = {network-security},
booktitle = {ACM CoNext workshop on Native Network Intelligence (NNI)},
howpublished = {https://nonsns.github.io/paper/rossi22conext-nni-a.pdf},
note = {project=huawei}
}
Learning meaningful representations from network data is critical
to ease the adoption of AI as a cornerstone to process network logs.
Since a large portion of such data is textual, Natural Language Processing (NLP) appears as an obvious candidate to learn their representations. Indeed, the literature proposes impressive applications
of NLP applied to textual network data. However, in the absence of
labels, objectively evaluating the goodness of the learned representations is still an open problem. We call for a systematic adoption of
domain-specific pretext tasks to select the best representation from
network data. Relying on such tasks enables us to evaluate different
representations on side machine learning problems and, ultimately,
unveiling the best candidate representations for the more interesting downstream tasks for which labels are scarce or unavailable.
We apply pretext tasks in the analysis of logs collected from
SSH honeypots. Here, a cumbersome downstream task is to cluster
events that exhibit a similar attack pattern. We propose the following pipeline: first, we represent the input data using a classic
NLP-based approach. Then, we design pretext tasks to objectively
evaluate the representation goodness and to select the best one. Finally, we use the best representation to solve the unsupervised task,
which uncovers interesting behaviours and attack patterns. All in
all, our proposal can be generalized to other text-based network
logs beyond honeypots.
@inproceedings{DR:CoNEXT-NNI-22b,
title = {{Native Network Intelligence, Fast and Slow}},
author = {Rossi, Dario and Liang, Zhang},
year = {2022},
month = dec,
topic = {ai-native},
booktitle = {ACM CoNext workshop on Native Network Intelligence (NNI)},
howpublished = {https://nonsns.github.io/paper/rossi22conext-nni-b.pdf},
note = {project=huawei}
}
As networks have historically been built around connectivity,
architectural features concerning quality of service, mobility, security and privacy have been added as afterthoughts – with consequent well known architectural headaches for their later integration.
Despite Artificial Intelligence (AI) is more a means to an end, that an architectural feature itself, this is not completely different from what concerns its integration: in particular, while Cloud and Edge computing paradigms made it possible to use AI techniques to relieve part of network operation, however AI is currently little more than an additional tool. This paper describes a vision of future networks, where AI becomes a first class commodity: its founding principle lays around the concept of “fast and slow” type of AI reasoning, each of which offers different types of AI capabilities to process network data.
We next outline how these building blocks naturally maps to different network segments, and discuss emerging AI-to-AI communication patterns as we move to more intelligent networks.
@article{DR:CommMag-22,
author = {Iacoboaiea, Ovidiu and Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
title = {From Design to Deployment of Zero-touch Deep Reinforcement Learning WLANs},
arxiv = {https://arxiv.org/abs/2207.06172},
journal = {IEEE Communications Magazine (vol. to appear)},
year = {2022},
month = dec,
doi = {10.1109/MCOM.002.2200318},
howpublished = {/ai4net/docs/rossi22commag.pdf},
note = {project=huawei},
topic = {wlan}
}
Machine learning (ML) is increasingly used to automate networking tasks, in a paradigm known as zero-touch network and service management (ZSM). In particular, Deep Reinforcement Learning (DRL) techniques have recently gathered much attention for their ability to learn taking complex decisions in different fields. In the ZSM context, DRL is an appealing candidate for tasks such as dynamic resource allocation, that is generally formulated as hard optimization problems. At the same time, successful training and deployment of DRL agents in real-world scenarios faces a number of challenges that we outline and address in this paper. Tackling the case of Wireless Local Area Network (WLAN) radio resource management, we report guidelines that extend to other usecases and more general contexts.
@inproceedings{DR:CoNEXT-GNN-22,
title = {{Cross-network transferable neural models for WLAN interference estimation}},
author = {Fernandes, Danilo Marinho and Krolikowski, Jonatan and Houidi, Zied Ben and Chen, Fuxing and Rossi, Dario},
year = {2022},
month = dec,
booktitle = {ACM CoNext workshop on Graph Neural Networks (GNN)},
topic = {wlan},
howpublished = {https://nonsns.github.io/paper/rossi22conext-gnn.pdf},
note = {project=huawei}
}
Airtime interference is a key performance indicator for WLANs,
measuring, for a given time period, the percentage of time
during which a node is forced to wait for other transmissions
before to transmitting or receiving. Being able to accurately
estimate interference resulting from a given state change
(e.g., channel, bandwidth, power) would allow a better control of WLAN resources, assessing the impact of a given
configuration before actually implementing it.
In this paper, we adopt a principled approach to interference estimation in WLANs. We first use real data to characterize the factors that impact it, and derive a set of relevant
synthetic workloads for a controlled comparison of various
deep learning architectures in terms of accuracy, generalization and robustness to outlier data. We find, unsurprisingly,
that Graph Convolutional Networks (GCNs) yield the best
performance overall, leveraging the graph structure inherent
to campus WLANs. We notice that, unlike e.g. LSTMs, they
struggle to learn the behavior of specific nodes, unless given
the node indexes in addition. We finally verify GCN model
generalization capabilities, by applying trained models on
operational deployments unseen at training time.
@inproceedings{DR:HotNets-22,
title = {{Towards a systematic multi-modal representation learning for network data}},
author = {Houidi, Zied Ben and Azorin, Raphael and Gallo, Massimo and Finamore, Alessandro and Rossi, Dario},
year = {2022},
month = nov,
booktitle = {ACM HotNets},
howpublished = {https://nonsns.github.io/paper/rossi22hotnets.pdf},
note = {project=huawei},
topic = {ai-native}
}
Learning the right representations from complex input data is
the key ability of successful machine learning (ML) models.
The latter are often tailored to a specific data modality. For
example, recurrent neural networks (RNNs) were designed
having the processing of sequential data in mind, while convolutional neural networks (CNNs) were designed to exploit
spatial correlation naturally present in images. Unlike computer vision (CV) and natural language processing (NLP),
each of which targets a single well-defined modality, network ML problems often have a mixture of data modalities
as input. Yet, instead of exploiting such abundance, practitioners tend to rely on sub-features thereof, reducing the
problem on single modality for the sake of simplicity.
In this paper, we advocate for exploiting all the modalities
naturally present in network data. As a first step, we observe
that network data systematically exhibits a mixture of quantities (e.g., measurements), and entities (e.g., IP addresses,
names, etc.). Whereas the former are generally well exploited, the latter are often underused or poorly represented
(e.g., with one-hot encoding). We propose to systematically
leverage state of the art embedding techniques to learn entity representations, whenever significant sequences of such
entities are historically observed. Through two diverse usecases, we show that such entity encoding can benefit and naturally augment classic quantity-based features.
@techreport{DR:ComNet-22,
title = {Human readable network troubleshooting based on anomaly detection and feature scoring},
author = {Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario},
journal = {Elsevier Computer Networks},
month = nov,
volume = {219},
year = {2022},
issn = {1389-1286},
topic = {ad-fs},
note = {project=huawei},
doi = {https://doi.org/10.1016/j.comnet.2022.109447},
arxiv = {https://arxiv.org/abs/2108.11807}
}
Network troubleshooting is still a heavily human-intensive process. To reduce the time spent by human operators in the diagnosis process, we present a system based on (i) unsupervised learning methods for detecting anomalies in the time domain, (ii) an attention mechanism to rank features in the feature space and finally (iii) an expert knowledge module able to seamlessly incorporate previously collected domain-knowledge.
In this paper, we thoroughly evaluate the performance of the full system and of its individual building blocks: particularly, we consider (i) 10 anomaly detection algorithms as well as (ii) 10 attention mechanisms, that comprehensively represent the current state of the art in the respective fields. Leveraging a unique collection of expert-labeled datasets worth several months of real router telemetry data, we perform a thorough performance evaluation contrasting practical results in constrained stream-mode settings, with the results achievable by an ideal oracle in academic settings.
Our experimental evaluation shows that (i) the proposed system is effective in achieving high levels of agreement with the expert,
and (ii) that even a simple statistical approach is able to extract useful information from expert knowledge gained in past cases, significantly improving troubleshooting performance.
@inproceedings{DR:ITC-22,
title = {Rare Yet Popular: Evidence and Implications from Anomaly Detection Datasets},
author = {Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario},
month = sep,
year = {2022},
booktitle = {International Teletraffic Congress (ITC34)},
howpublished = {https://nonsns.github.io/paper/rossi22itc.pdf},
arxiv = {https://arxiv.org/abs/2211.10129},
note = {project=huawei},
topic = {ad-fs}
}
Anomaly detection research works generally propose algorithms or end-to-end systems that are designed to automatically discover outliers in a dataset or a stream. While literature abounds concerning algorithms or the definition of metrics for better evaluation, the quality of the ground truth against which they are evaluated is seldom questioned. On this paper, we present a systematic analysis of available public (and additionally our private) ground truth for anomaly detection in the context of network environment, where data is intrinsically temporal, multivariate and, in particular, exhibits spatial properties, which to the best of our knowledge we are the first to explore. Our analysis reveals that, while anomalies are by definition temporally rare events, their spatial characterization clearly shows some type of anomalies are significantly more popular than others. This can be achieved through surprisingly simple techniques and may have profound implications on the cost and quality of the labeling process.
[PATENT-PCT/EP2022/075646]
YANG, Lixuan and FINAMORE, Alessandro and CHEN, Fuxing and ROSSI, Dario,
"A device and method for network traffic classification" , Patent PCT/EP2022/075646
sep.
2022,
Patent
@misc{DR:PATENT-PCT/EP2022/075646,
author = {YANG, Lixuan and FINAMORE, Alessandro and CHEN, Fuxing and ROSSI, Dario},
title = {A device and method for network traffic classification},
note = {Patent PCT/EP2022/075646},
month = sep,
patent = {True},
year = {2022}
}
@article{DR:TNSM-22,
title = {Landing AI on Networks: An equipment vendor
viewpoint on Autonomous Driving Networks},
author = {Rossi, Dario and Zhang, Liang},
month = sep,
volume = {19},
issue = {3},
year = {2022},
journal = {IEEE Transactions on Network and Service Management (TNSM)},
doi = {10.1109/TNSM.2022.3169988},
howpublished = {https://nonsns.github.io/paper/rossi22tnsm.pdf},
arxiv = {https://arxiv.org/abs/2205.08347},
note = {project=huawei},
topic = {ai-native}
}
The tremendous achievements of Artificial Intelligence (AI) in computer vision, natural language processing,
games and robotics, has extended the reach of the AI hype to
other fields: in telecommunication networks, the long term vision
is to let AI fully manage, and autonomously drive, all aspects
of network operation. In this industry vision paper, we discuss
challenges and opportunities of Autonomous Driving Network
(ADN) driven by AI technologies. To understand how AI can be
successfully landed in current and future networks, we start by
outlining challenges that are specific to the networking domain,
putting them in perspective with advances that AI has achieved in
other fields. We then present a system view, clarifying how AI can
be fitted in the network architecture. We finally discuss current
achievements as well as future promises of AI in networks,
mentioning roadmap to avoid bumps in the road that leads to
true large-scale deployment of AI technologies in network
@article{DR:ComCom-22,
title = { Neural language models for network configuration: Opportunities and reality check},
author = {Houidi, Zied Ben and Rossi, Dario},
month = sep,
issue = {193},
pages = {Pages 118-125},
year = {2022},
journal = {Elsevier Computer Communication},
volume = {(to appear)},
howpublished = {https://nonsns.github.io/paper/rossi22comcom.pdf},
doi = {https://doi.org/10.1016/j.comcom.2022.06.035},
arxiv = {https://arxiv.org/abs/2205.01398},
note = {project=huawei},
topic = {ai-native}
}
Boosted by deep learning, natural language processing (NLP) techniques have recently seen spectacular progress, mainly fueled by breakthroughs both in representation learning with word embeddings (e.g. word2vec) as well as novel architectures (e.g. transformers).This success quickly invited researchers to explore the use of NLP techniques to other field, such as computer programming languages, with the promise to automate tasks in software programming (bug detection, code synthesis, code repair, cross language translation etc.). By extension, NLP has potential for application to network configuration languages as well, for instance considering tasks such as network configuration verification, synthesis, and cross-vendor translation. In this paper, we survey recent advances in deep learning applied to programming languages, for the purpose of code verification, synthesis and translation: in particularly, we review their training requirements and expected performance, and qualitatively assess whether similar techniques can benefit corresponding use-cases in networking.
@article{DR:SIGMETRICS-PEVA-22,
author = {Roberts, James and Rossi, Dario},
title = {Size-Based Scheduling vs Fairness for Datacenter Flows: A Queuing Perspective},
year = {2022},
month = sep,
volume = {50},
number = {2},
url = {https://doi.org/10.1145/3561074.3561076},
arxiv = {https://arxiv.org/abs/2203.12983},
note = {project=huawei},
journal = {ACM SIGMETRICS Perform. Eval. Rev.},
howpublished = {/ai4net/docs/rossi22sigmetrics-per.pdf},
topic = {performance}
}
Contrary to the conclusions of a recent body of work where approximate shortest remaining processing time first (SRPT) flow scheduling is advocated for datacenter networks, this paper aims to demonstrate that imposing fairness remains a preferable objective. We evaluate abstract queuing models by analysis and simulation to illustrate the non-optimality of SRPT under the reasonable assumptions that datacenter source-destination flows occur in batches and bursts and not, as usually assumed, individually at the instants of a Poisson process. Results for these models have significant implications for the design of bandwidth sharing strategies for datacenter networks. In particular, we propose a novel "virtual fair scheduling" algorithm that enforces fairness between batches and is arguably simple enough to be implemented in high speed devices.
@inproceedings{DR:KDD-22,
title = {Local Evaluation of Time Series Anomaly Detection Algorithms},
author = {Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario},
year = {2022},
month = aug,
booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data mining (KDD)},
howpublished = {https://nonsns.github.io/paper/rossi22kdd.pdf},
arxiv = {https://arxiv.org/abs/2206.13167},
note = {project=huawei},
topic = {ad-algo}
}
In recent years, specific evaluation metrics for time series anomaly detection algorithms have been developed to handle the limitations of the classical precision and recall. However, such metrics are heuristically built as an aggregate of multiple desirable aspects, introduce parameters and wipe out the interpretability of the output. In this article, we first highlight the limitations of the classical precision/recall, as well as the main issues of the recent event-based metrics – for instance, we show that an adversary algorithm can reach high precision and recall on almost any dataset under weak assumption. To cope with the above problems, we propose a theoretically grounded, robust, parameter-free and interpretable extension to precision/recall metrics, based on the concept of “affiliation” between the ground truth and the prediction sets. Our metrics leverage measures of duration between ground truth and predictions, and have thus an intuitive interpretation. By further comparison against random sampling, we obtain a normalized precision/recall, quantifying how much a given set of results is better than a random baseline prediction. By construction, our approach keeps the evaluation local regarding ground truth events, enabling fine-grained visualization and interpretation of algorithmic results. We compare our proposal against various public time series anomaly detection datasets, algorithms and metrics. We further derive theoretical properties of the affiliation metrics that give explicit expectations about their behavior and ensure robustness against adversary strategies.
@article{DR:SIGCOMM-CCR-22,
title = {{AppClassNet: A commercial-grade dataset for application identification research}},
author = {Wang, Chao and Finamore, Alessandro and Yang, Lixuan and Fauvel, Kevin and Rossi, Dario},
year = {2022},
journal = {ACM SIGCOMM Computer Communication Review},
month = jul,
volume = {52},
issue = {3},
howpublished = {https://nonsns.github.io/paper/rossi22ccr.pdf},
doi = {https://doi.org/10.1145/3561954.3561958},
note = {project=huawei},
topic = {tc-train}
}
The recent success of Artificial Intelligence (AI) is rooted into several concomitant factors, namely theoretical progress coupled with
abundance of data and computing power. Large companies can take
advantage of a deluge of data, typically withhold from the research
community due to privacy or business sensitivity concerns, and
this is particularly true for networking data. Therefore, the lack
of high quality data is often recognized as one of the main factors
currently limiting networking research from fully leveraging AI
methodologies potential.
Following numerous requests we received from the scientific
community, we release AppClassNet, a commercial-grade dataset
for benchmarking traffic classification and management methodologies. AppClassNet is significantly larger than the datasets generally
available to the academic community in terms of both the number
of samples and classes, and reaches scales similar to the popular
ImageNet dataset commonly used in computer vision literature. To
avoid leaking user- and business-sensitive information, we opportunely anonymized the dataset, while empirically showing that it
still represents a relevant benchmark for algorithmic research. In
this paper, we describe the public dataset and our anonymization
process. We hope that AppClassNet can be instrumental for other
researchers to address more complex commercial-grade problems
in the broad field of traffic classification and management.
[ICML-22]
Franzese, Giulio and Rossi, Simone and Yang, Lixuan and Finamore, Alessandro and Rossi, Dario and Filippone, Maurizio and Michiardi, Pietro,
" How much diffusion time is enough? "
ICML 2022 workshop on Continuous time methods for machine learning
jun.
2022,
Conference
@inproceedings{DR:ICML-22,
author = {Franzese, Giulio and Rossi, Simone and Yang, Lixuan and Finamore, Alessandro and Rossi, Dario and Filippone, Maurizio and Michiardi, Pietro},
title = { How much diffusion time is enough? },
booktitle = {ICML 2022 workshop on Continuous time methods for machine learning},
year = {2022},
month = jun,
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi22icml.pdf},
topic = {TBD}
}
Score-based diffusion models map noise into data using stochastic differential equations. While current practice
advocates for a large T to ensure closeness to steady state, a smaller value of T should be preferred for a better
approximation of the score-matching objective and computational efficiency. We conjecture, contrary to current
belief and corroborated by numerical evidence, that the optimal diffusion times are smaller than current adoptions.
@inproceedings{DR:INFOCOM-22,
title = {Accelerating Deep Learning Classification with Error-controlled Approximate-key Caching},
author = {Finamore, Alessandro and Roberts, James and Gallo, Massimo and Rossi, Dario},
booktitle = {IEEE INFOCOM},
year = {2022},
month = may,
arxiv = {https://arxiv.org/abs/2112.06671},
howpublished = {https://nonsns.github.io/paper/rossi22infocom.pdf},
note = {project=huawei},
topic = {tc-system}
}
While Deep Learning (DL) technologies are a promising tool to solve networking problems that map to classification tasks, their computational complexity is still too high with respect to real-time traffic measurements requirements. To reduce the DL inference cost, we propose a novel caching paradigm, that we named approximate-key caching, which returns approximate results for lookups of selected input based on cached DL inference results. While approximate cache hits alleviate DL inference workload and increase the system throughput, they however introduce an approximation error. As such, we couple approximate-key caching with an error-correction principled algorithm, that we named auto-refresh. We analytically model our caching system performance for classic LRU and ideal caches, we perform a trace-driven evaluation of the expected performance, and we compare the benefits of our proposed approach with the state-of-the-art similarity caching – testifying the practical interest of our proposal.
[PATENT-PCT/EP2022/059292]
FINAMORE, Alessandro and YANG, Lixuan and ROSSI, Dario,
"Method to address extreme class imbalance in AI based classifiers" , Patent PCT/EP2022/059292
apr.
2022,
Patent
@misc{DR:PATENT-PCT/EP2022/059292,
author = {FINAMORE, Alessandro and YANG, Lixuan and ROSSI, Dario},
title = {Method to address extreme class imbalance in AI based classifiers},
note = {Patent PCT/EP2022/059292},
month = apr,
patent = {True},
year = {2022}
}
[PATENT-PCT/EP2022/057757]
NAVARRO, Jose Manuel and HUET, Alexis and ROSSI, Dario,
"Aggregation of Anomalies in a Network" , Patent PCT/EP2022/057757
mar.
2022,
Patent
@misc{DR:PATENT-PCT/EP2022/057757,
author = {NAVARRO, Jose Manuel and HUET, Alexis and ROSSI, Dario},
title = {Aggregation of Anomalies in a Network},
note = {Patent PCT/EP2022/057757},
month = mar,
patent = {True},
year = {2022}
}
[PATENT-PCT/EP2022/051624]
HOUIDI, Zied BEN and KROLIKOWSKI, Jonatan and ROSSI, Dario,
"Network resource control based on neighborhood measurements" , Patent PCT/EP2022/051624
jan.
2022,
Patent
@misc{DR:PATENT-PCT/EP2022/051624,
author = {HOUIDI, Zied BEN and KROLIKOWSKI, Jonatan and ROSSI, Dario},
title = {Network resource control based on neighborhood measurements},
note = {Patent PCT/EP2022/051624},
month = jan,
patent = {True},
year = {2022}
}
@inproceedings{DR:AAAI-22,
title = {Neural combinatorial optimization beyond the TSP: Existing architectures under-represent graph structure},
author = {Boffa, Matteo and Houidi, Zied Ben and Krolikowski, Jonatan and Rossi, Dario},
year = {2022},
booktitle = { AAAI workshop on Graphs and more complex structures for learning and reasoning (GLCR'22)},
howpublished = {https://nonsns.github.io/paper/rossi22aaai-glcr.pdf},
arxiv = {https://arxiv.org/abs/2201.00668},
note = {project=huawei},
topic = {wlan}
}
Recent years have witnessed the promise that reinforcement learning, coupled with Graph Neural Network (GNN) architectures, could learn to solve hard combinatorial optimization problems: given raw input data and an evaluator to guide the process, the idea is to automatically learn a policy able to return feasible and high-quality outputs. Recent work have shown promising results but the latter were mainly evaluated on the travelling salesman problem (TSP) and similar abstract variants such as Split Delivery Vehicle Routing Problem (SDVRP). This paper assesses how and whether recent neural architectures also transfer to graph problems of practical interest. We thus set out to systematically transfer these architectures to the Power and Channel Allocation Problem (PCAP), which has practical relevance for, e.g., radio resource allocation in wireless networks. Our experimental results suggest that existing architectures (i) are still incapable of capturing graph structural features and (ii) are not suitable for problems where the actions on the graph change the graph attributes. On a positive note, we show that augmenting the structural representation of problems with Distance Encoding is a promising step towards the still-ambitious goal of learning multi-purpose autonomous solvers.
2021 # 17
[TOMPECS-21]
Geissler, Stefan and Lange, Stanislav and Linguaglossa, Leonardo and Rossi, Dario and Zinner, Thomas and Hossfeld, Tobias,
"Discrete-Time Modeling of NFV Accelerators That Exploit Batched Processing"
In ACM Trans. Model. Perform. Eval. Comput. Syst.,
Vol. 6,
No. 3,
dec.
2021,
DOI 10.1145/3488243
Journal
@article{DR:TOMPECS-21,
author = {Geissler, Stefan and Lange, Stanislav and Linguaglossa, Leonardo and Rossi, Dario and Zinner, Thomas and Hossfeld, Tobias},
title = {Discrete-Time Modeling of NFV Accelerators That Exploit Batched Processing},
year = {2021},
volume = {6},
number = {3},
howpublished = {https://doi.org/10.1145/3488243},
doi = {10.1145/3488243},
journal = {ACM Trans. Model. Perform. Eval. Comput. Syst.},
month = dec,
articleno = {11},
numpages = {27},
keywords = {vector packet processing, network measurement, waiting time, Packet processing, discrete-time analysis, performance model, DPDK}
}
Network Functions Virtualization (NFV) is among the latest network revolutions, promising increased flexibility and avoiding network ossification. At the same time, all-software NFV implementations on commodity hardware raise performance issues when comparing to ASIC solutions. To address these issues, numerous software acceleration frameworks for packet processing have been proposed in the last few years. One central mechanism of many of these frameworks is the use of batching techniques, where packets are processed in groups as opposed to individually. This is required to provide high-speed capabilities by minimizing framework overhead, reducing interrupt pressure, and leveraging instruction-level cache hits. Several such system implementations have been proposed and experimentally benchmarked in the past. However, the scientific community has so far only to a limited extent attempted to model the system dynamics of modern NFV routers exploiting batching acceleration. In this article, we propose a simple, generic model for this type of batching-based systems that can be applied to predict all relevant key performance indicators. In particular, we extend our previous work and formulate the calculation of the queue size as well as waiting time distributions in addition to the batch size distribution and the packet loss probability. Furthermore, we introduce the waiting time distribution as a relevant QoS parameter and perform an in-depth parameter study, widening the set of investigated variables as well as the range of values. Finally, we contrast the model prediction with experimental results gathered in a high-speed testbed including an NFV router, showing that the model not only correctly captures system performance under simple conditions, but also in more realistic scenarios in which traffic is processed by a mixture of functions.
@inproceedings{DR:CoNEXT-21a,
author = {Gioacchini, Luca and Vassio, Luca and Mellia, Marco and Drago, Idilio and Ben Houidi, Zied and Rossi, Dario},
title = {DarkVec: Automatic Analysis of Darknet Traffic with Word Embeddings },
booktitle = {ACM CoNEXT, Runner-up for best paper award},
year = {2021},
note = {project=huawei keyword=embedding,bestpaperrunnerup},
month = dec,
partner = {polito-mellia},
howpublished = {https://nonsns.github.io/paper/rossi21conext-a.pdf},
topic = {network-security}
}
Darknets are passive probes listening to traffic reaching IP addresses
that host no services. Traffic reaching them is unsolicited by
nature and often induced by scanners, malicious senders and
misconfigured hosts. Its peculiar nature makes it a valuable source
of information to learn about malicious activities. However, the
massive amount of packets and sources that reach darknets makes it
hard to extract meaningful insights. In particular, multiple senders
contact the darknet while performing similar and coordinated
tasks, which are often commanded by common controllers (botnets,
crawlers, etc.). How to automatically identify and group those
senders that share similar behaviors remains an open problem.
We here introduce DarkVec, a methodology to identify clusters of
senders (i.e., IP addresses) engaged in similar activities on darknets.
DarkVec leverages word embedding techniques (e.g., Word2Vec) to
capture the co-occurrence patterns of sources hitting the darknets.
We extensively test DarkVec and explore its design space in a case
study using one month of darknet data. We show that with a proper
definition of service, the generated embeddings can be easily used
to (i) associate unknown senders’ IP addresses to the correct known
labels (more than 96% accuracy), and (ii) identify new attack and
scan groups of previously unknown senders. We contribute DarkVec
source code and datasets to the community also to stimulate the
use of word embeddings to automatically learn patterns on generic
traffic traces
@inproceedings{DR:CoNEXT-21b,
author = {Azorin, Raphael and Gallo, Massimo and Finamore, Alessandro and Filippone, Maurizio and Michiardi, Pietro and Rossi, Dario},
title = {Towards a Generic Deep Learning Pipeline for Traffic Measurements},
booktitle = {ACM CoNEXT, Student Workshop},
note = {project=huawei},
year = {2021},
month = dec,
howpublished = {https://nonsns.github.io/paper/rossi21conext-b.pdf},
topic = {measurement}
}
[SEC-21]
Gallo, Massimo and Finamore, Alessandro and Simon, Gwendal and Rossi, Dario,
"FENXI: Fast In-Network Analytics"
IEEE/ACM Symposium on Edge Computing (SEC)
dec.
2021,
arXiv Conference
@inproceedings{DR:SEC-21,
author = {Gallo, Massimo and Finamore, Alessandro and Simon, Gwendal and Rossi, Dario},
title = {{FENXI}: Fast In-Network Analytics},
booktitle = {IEEE/ACM Symposium on Edge Computing (SEC)},
year = {2021},
month = dec,
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21sec.pdf},
arxiv = {http://arxiv.org/abs/2105.11738},
topic = {tc-system}
}
Live traffic analysis at the first aggregation point in the ISP
network enables the implementation of complex traffic engineering policies but is limited by the scarce processing
capabilities, especially for Deep Learning (DL) based analytics. The introduction of specialized hardware accelerators
i.e., Tensor Processing Unit (TPU), offers the opportunity to
enhance processing capabilities of network devices at the
edge. Yet, to date, no packet processing pipeline is capable
of offering DL-based analysis capabilities in the data-plane,
without interfering with network operations.
In this paper, we present FENXI, a system to run complex
analytics by leveraging TPU. The design of FENXI decouples
forwarding operations and traffic analytics which operates
at different granularities i.e., packet and flow levels. We conceive two independent modules that asynchronously communicate to exchange network data and analytics results,
and design data structures to extract flow level statistics without impacting per-packet processing. We prototype FENXI
on a general-purpose server and evaluate its performance
in both adversarial and realistic network conditions. Our
evaluation shows that FENXI is able to offer DL processing
to 100 Gbps linecards with a limited number of resources,
while also dynamically adapting to network conditions.
@article{DR:TNSM-21b,
author = {{Yang}, Lixuan and {Finamore}, Alessandro and {Feng}, Jun and {Rossi}, Dario},
title = {Deep Learning and Zero-Day Traffic Classification: Lessons learned from a commercial-grade dataset},
journal = {IEEE Transactions on Network and Service Management},
volume = {18},
number = {4},
pages = {4103--4118},
year = {2021},
month = dec,
doi = {10.1109/TNSM.2021.3122940},
note = {project=huawei},
arxiv = {https://arxiv.org/abs/2104.03182},
howpublished = {https://nonsns.github.io/paper/rossi21tnsm-b.pdf},
topic = {tc-algo}
}
The increasing success of Machine Learning (ML) and Deep Learning (DL) has recently re-sparked interest towards traffic classification. While supervised techniques provide satisfactory performance when classifying known traffic, the detection of zero-day (i.e., unknown) traffic is a more challenging task. At the same time, zero-day detection, generally tackled with unsupervised techniques such as clustering, received less coverage by the traffic classification literature which focuses more on deriving DL models via supervised techniques. Moreover, the combination of supervised and unsupervised techniques poses challenges not fully covered by the traffic classification literature. In this paper, we share our experience on a commercial-grade DL traffic classification engine that combines supervised and unsupervised techniques to identify known and zero-day traffic. In particular, we rely on a dataset with hundreds of very fine grained application labels, and perform a thorough assessment of two state of the art traffic classifiers in commercial-grade settings. This pushes the boundaries of traffic classifiers evaluation beyond the few tens of classes typically used in the literature. Our main contribution is the design and evaluation of GradBP, a novel technique for zero-day applications detection. Based on gradient backpropagation and tailored for DL models, GradBP yields superior performance with respect to state of the art alternatives, in both accuracy and computational cost. Overall, while ML and DL models are both equally able to provide excellent performance for the classification of known traffic, the non-linear feature extraction process of DL models backbone provides sizable advantages for the detection of unknown classes over classical ML models
@article{DR:NETMAG-21,
author = {{Yang}, Lixuan and and Dario {Rossi}},
title = {Quality monitoring and assessment of deployed Deep Learning models for Network AIOps},
journal = {IEEE Network Magazine,},
year = {2021},
month = nov,
vol = {35},
issue = {6},
pages = { 84-90},
doi = {10.1109/MNET.001.2100227},
topic = {tc-algo},
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21netmag.pdf}
}
Artificial Intelligence (AI) has recently attracted a lot of attention, transitioning from research labs to a wide range of successful deployments in many fields, which is particularly true for Deep Learning (DL)
techniques.
Ultimately, DL models are software artifacts, that, as any software realization, need to be regularly maintained and updated: consequently, as a logical extension of the DevOps software development practices to AI-software applied to network operation and management, AIOps foresee to continuously push evolved models in production networks.
While for some network use-cases DL models can be incrementally updated at relatively low cost, the more typical case is that updating deployed DL models has a significant cost that needs to be managed. It follows that, during the lifecycle of DL model deployment, it is important to assess the relative “staleness” of deployed DL models, so to prioritize update of “ageing” models. In this article, we cover the issue of quality assessment and tracking of DL models deployed for network management purposes.
@inproceedings{DR:TMA-21,
author = {Bovenzi, Giampaolo and Yang, Lixuan and Finamore, Alessandro and Aceto, Giuseppe and Ciuonzo, Domenico and Pescape, Antonio and Rossi, Dario},
title = { A First Look at Class Incremental Learning in Deep Learning Mobile Traffic},
booktitle = {IFIP Traffic Monitoring and Analysis (TMA)},
year = {2021},
month = sep,
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21tma.pdf},
topic = {tc-train}
}
The recent popularity growth of Deep Learning (DL) re-ignited the interest towards traffic classification, with several studies demonstrating the accuracy of DL-based classifiers to identify Internet applications’ traffic. Even with the aid of hardware accelerators (GPUs, TPUs), DL model training remains expensive, and limits the ability to operate frequent model updates necessary to fit to the ever evolving nature of Internet traffic, and mobile traffic in particular. To address this pain point, in this work we explore Incremental Learning (IL) techniques to add new classes to models without a full retraining, hence speeding up model’s updates cycle. We consider iCarl, a state of the art IL method, and MIRAGE-2019, a public dataset with traffic from 40 Android apps, aiming to understand if there is a case for incremental learning in traffic classification. By dissecting iCarl internals, we discuss ways to improve its design, contributing a revised version, namely iCarl+. Despite our analysis reveals their infancy, IL techniques are a promising research area on the roadmap towards automated DL-based traffic analysis systems.
[PATENT-PCT/EP2021/074381]
Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario,
"Root cause analysis of anomalous events in a computer network" , Patent PCT/EP2021/074381
sep.
2021,
Patent
@misc{DR:PATENT-PCT/EP2021/074381,
author = {Huet, Alexis and Navarro, Jose Manuel and Rossi, Dario},
title = {Root cause analysis of anomalous events in a computer network},
note = {Patent PCT/EP2021/074381},
month = sep,
patent = {True},
year = {2021}
}
[ICML-UDL-21]
Yang, Lixuan and Rossi, Dario,
"Thinkback: Task Specific Out-of-Distribution Detection"
International Conference on Machine Learning (ICML) workshop on Uncertainty and Robustness in Deep Learning (UDL) 2021
jun.
2021,
Conference
@inproceedings{DR:ICML-UDL-21,
author = {{Yang}, Lixuan and {Rossi}, Dario},
title = {Thinkback: Task Specific Out-of-Distribution Detection},
year = {2021},
month = jun,
booktitle = {International Conference on Machine Learning (ICML) workshop on Uncertainty and Robustness in Deep Learning (UDL) 2021},
topic = {tc-algo},
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21icml-udl.pdf}
}
The increased success of Deep Learning (DL) has
recently sparked large-scale deployment of DL
models in many diverse industry segments. Yet,
a crucial weakness of supervised model is the
inherent difficulty in handling out-of-distribution
samples, i.e., samples belonging to classes that
were not presented to the model at training time.
We propose in this paper a novel way to formulate
the out-of-distribution detection problem, tailored
for DL models. Our method does not require fine
tuning process on training data, yet is significantly
more accurate than the state of the art for out-ofdistribution
detection.
@inproceedings{DR:NETWORKING-21,
author = {Iacoboaiea, Ovidiu and Krolikowski, Jonatan and {Ben Houidi}, Zied and Rossi, Dario},
title = {{Real-Time} Channel Management in {WLANs:} Deep Reinforcement Learning
Versus Heuristics},
booktitle = {IFIP Networking},
address = {Helsinki, Finland},
month = jun,
year = {2021},
howpublished = {https://nonsns.github.io/paper/rossi21networking.pdf},
topic = {wlan},
note = {project=huawei}
}
Today’s WLANs rely on a centralized Access Controller (AC) entity for
managing distributed wireless Access Points (APs) to which user devices
connect. The availability of real-time analytics at the AC opens the
possibility to automate the allocation of scarce radio resources,
continuously adapting to changes in traffic demands. Often, the allocation
problem is formulated in terms of weighted graph coloring, which is
NP-hard, and custom heuristics are used to find satisfactory solutions. In
this paper, we contrast solutions that are based on (and even improve)
state of the art heuristics to a data-driven solution that leverages Deep
Reinforcement Learning (DRL). Based on both simulation results as well as
experiments in a real deployment, we show that our DRL-based scheme not
only learns to solve the complex combinatorial problem in bounded time,
outperforming heuristics, but it also exhibits appealing generalization
properties, e.g. to different network sizes and densities.
@inproceedings{DR:INFOCOM-21,
author = {Krolikowski, Jonatan and Iacoboaiea, Ovidiu and Houidi, Zied Ben and Rossi, Dario},
title = {WiFi Dynoscope: Interpretable Real-Time WLAN Optimization},
booktitle = {IEEE INFOCOM, Demo session},
month = may,
year = {2021},
volume = {},
pages = {},
doi = {},
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21infocom.pdf},
topic = {wlan}
}
Today’s Wireless Local Area Networks (WLANs) rely on a centralized Access Controller (AC) entity for managing a fleet of Access Points (APs). Real-time analytics enable the AC to optimize the radio resource allocation (i.e. channels) on-line in response to sudden traffic shifts. Deep Reinforcement Learning (DRL) relieves the pressure of finding good optimization heuristics by learning a policy through interactions with the environment. However, it is not granted that DRL will behave well in unseen conditions. Tools such as the WiFi Dynoscope introduced here are necessary to gain this trust. In a nutshell, this demo dissects the dynamics of WLAN networks, both simulated and from real large-scale deployments, by (i) comparatively analyzing the performance of different algorithms on the same deployment at high level and (ii) getting low-level details and insights into algorithmic behaviour.
@techreport{DR:TECHREP-21-DRL,
author = {Iacoboaiea, Ovidiu and Krolikowski, Jonatan and Houidi, Zied Ben and Rossi, Dario},
title = {The long and winding road to Deep Autonomous
Networks: Lessons from real WLAN deployment},
year = {2021},
month = apr,
topic = {wlan-algo},
note = {project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi21techrep_DRL_deploy.pdf}
}
Deep Reinforcement Learning (DRL) techniques have recently gathered much attention for their ability to learn taking complex decisions in different fields: as such, they are an appealing candidate for network Operation and Management (O&M). In particular, DRL can become a fundamental item in the toolbox of the so called “self-driving networks”, especially for tasks such as dynamic resource allocation, that is generally formulated and solved as complex optimization problems. Yet, training and deployment of DRL agents in real-world scenarios face important challenges, that we illustrate in this article using Wireless LANs as a relevant deployment example.
[TNSM-21a]
Huet, Alexis and Saverimoutou, Antoine and Houidi, Zied Ben and Shi, Hao and Cai, Shengming and Xu, Jinchun and Mathieu, Bertrand and Rossi, Dario,
"Deployable models for approximating web QoE metrics from encrypted traffic"
In IEEE Transactions on Network and Service Management,
pp.839-854,
mar.
2021,
DOI 10.1109/TNSM.2020.3037019
Journal
@article{DR:TNSM-21a,
title = {Deployable models for approximating web QoE metrics from encrypted traffic},
author = {Huet, Alexis and Saverimoutou, Antoine and Houidi, Zied Ben and Shi, Hao and Cai, Shengming and Xu, Jinchun and Mathieu, Bertrand and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
year = {2021},
month = mar,
vol = {18},
issue = {1},
pages = {839-854},
doi = {10.1109/TNSM.2020.3037019},
howpublished = {https://nonsns.github.io/paper/rossi21tnsm-a.pdf},
note = {project=huawei},
topic = {qoe-web}
}
Being on endpoints, Content Providers can easily evaluate end users’ web browsing quality of experience (web QoE) by accessing in-browser computed application-level metrics. Because of end-to-end traffic encryption, it is becoming considerably harder for Internet Service Providers (ISPs) to evaluate the web QoE of their customers, which is important for management purposes. In this paper, we propose data-driven machine learning techniques and exact flow-level algorithmic methods to infer well-known application-level web performance metrics (such as SpeedIndex and Page Load Time) from raw encrypted streams of network traffic. We prove the efficiency of our approach taking as input a unique dataset of more than 200,000 experiments, targeting a large set of popular pages (Alexa top-500), from probes from several ISPs networks, with different browsers (Chrome, Firefox) and viewport combinations. Results show that our data-driven models are not only accurate for several web performance metrics, but also feature the ability to generalize to previously unseen conditions. Furthermore, we discuss how our extremely lightweight flow-level method has a provable accuracy on a specific metric, and is thus of particular appeal from a deployment viewpoint
[PATENT-PCT/EP2021/050902]
Finamore, Alessandro and Gallo, Massimo and Simon, Gwendal and Rossi, Dario,
"Controller For A Cache And Method For Controlling A Cache " , Patent PCT/EP2021/050902
jan.
2021,
Patent
@misc{DR:PATENT-PCT/EP2021/050902,
author = {Finamore, Alessandro and Gallo, Massimo and Simon, Gwendal and Rossi, Dario},
title = {Controller For A Cache And Method For Controlling A Cache },
month = jan,
note = {Patent PCT/EP2021/050902},
year = {2021},
patent = {True}
}
[PATENT-PCT/EP2021/076753]
Houidi, Zied Ben and and Dario Rossi and del Sordo, Lorenzo and Milan, Giulia and Vassio, Luca and Mellia, Marco and Drago, Idilio,
"Honeypot entity and method of operating the same" , Patent PCT/EP2021/076753
2021,
Patent
@misc{DR:PATENT-PCT/EP2021/076753,
author = {Houidi, Zied Ben and and Dario Rossi and del Sordo, Lorenzo and Milan, Giulia and Vassio, Luca and Mellia, Marco and Drago, Idilio},
title = {Honeypot entity and method of operating the same},
note = {Patent PCT/EP2021/076753},
year = {2021},
patent = {True}
}
[PATENT-PCT/EP2021/057212]
Yang, Lixuan and Jun, Feng and Rossi, Dario,
"Device and method for identifying unknown classes for artificial neural networks" , Patent PCT/EP2021/057212
2021,
Patent
@misc{DR:PATENT-PCT/EP2021/057212,
author = {Yang, Lixuan and Jun, Feng and Rossi, Dario},
title = {Device and method for identifying unknown classes for artificial neural networks},
note = {Patent PCT/EP2021/057212},
year = {2021},
patent = {True}
}
[PATENT-PCT/FR2021/000024]
Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario and Putina, Andrian and Sozio, Mauro,
"System and method for combining anomaly detection algorithms" , Patent PCT/FR2021/000024
2021,
Patent
@misc{DR:PATENT-PCT/FR2021/000024,
author = {Navarro, Jose Manuel and Huet, Alexis and Rossi, Dario and Putina, Andrian and Sozio, Mauro},
title = {System and method for combining anomaly detection algorithms},
topic = {ad-algo},
note = {Patent PCT/FR2021/000024},
patent = {True},
year = {2021}
}
@inproceedings{DR:ICDM-20,
author = {Putina, Andrian and Sozio, Mauro and Navarro, Jose M. and Rossi, Dario},
title = {Random Histogram Forest for Unsupervised Anomaly Detection},
booktitle = {20th IEEE International Conference on Data Mining (ICDM)},
month = nov,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=anomaly, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20icdm.pdf},
topic = {ad-algo}
}
Roughly speaking, anomaly detection consists of identifying instances whose features significantly deviate from the rest of input data. It is one of the most widely studied problems in unsupervised machine learning, boasting applications in network intrusion detection, fraud detection, healthcare and many others. Several methods have been developed in recent years, however, a satisfactory solution is still missing to the best of our knowledge. We present Random Histogram Forest an effective approach for unsupervised anomaly detection. Our approach is probabilistic, which has been proved to be effective in identifying anomalies. Moreover, it employs the fourth central moment (aka kurtosis), so as to identify potential anomalous instances. We conduct an extensive experimental evaluation on 38 datasets including all benchmarks for anomaly detection, as well as the most successful algorithms for unsupervised anomaly detection, to the best of our knowledge. Moreover, we provide some novel datasets that are made publicly available. We evaluate all the approaches in terms of the average precision of the area under the precision-recall curve (AP) as well as ROC. Our evaluation shows that our approach significantly outperforms all other approaches, both in terms of both AP and ROC, while boasting linear running time.
@inproceedings{DR:CNSM-20a,
author = {Wassermann, Sarah and Casas, Pedro and Houidi, Zied Ben and Huet, Alexis and Seufert, Michael and Wehner, Nikolas and Schuler, Joshua and Cai, Sheng-Ming and Shi, Hao and Xu, Jinchun and Hossfeld, Tobias and Rossi, Dario},
title = {Are you on Mobile or Desktop? On the Impact of End-User
Device on Web QoE Inference from Encrypted Traffic},
booktitle = {IEEE International Conference on Network and Service Management (CNSM)},
month = nov,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20cnsm-a.pdf},
topic = {web-qoe}
}
Web browsing is one of the key applications of the Internet, if not the most important one. We address the problem of Web Quality-of-Experience (QoE) monitoring from the ISP perspective, relying on in-network, passive measurements. As a proxy to Web QoE, we focus on the analysis of the well-known SpeedIndex (SI) metric. Given the lack of application-level-data visibility introduced by the wide adoption of end-to-end encryption, we resort to machine-learning models to infer the SI and the QoE level of individual web-page loading sessions, using as input only packet- and flow-level data. In this paper, we study the impact of different end-user device types (e.g., smartphone, desktop, tablet) on the performance of such models. Empirical evaluations on a large, multi-device, heterogeneous corpus of Web-QoE measurements for the most popular websites demonstrate that the proposed solution can infer the SI as well as estimate QoE ranges with high accuracy, using either packet-level or flow-level measurements. In addition, we show that the device type adds a strong bias to the feasibility of these Web-QoE models, putting into question the applicability of previously conceived approaches on single-device measurements. To improve the state of the art, we conceive cross-device generalizable models operating at both packet and flow levels, offering a feasible solution for Web-QoE monitoring in operational, multi-device networks. To the best of our knowledge, this is the first study tackling the analysis of Web QoE from encrypted network traffic in multi-device scenarios.
@article{DR:TNSM-20b,
author = {Putina, Andrian and Rossi, Dario},
journal = {IEEE Transactions on Network and Service Management},
title = { Online anomaly detection leveraging stream-based clustering and real-time telemetry},
year = {2020},
month = nov,
volume = {18},
number = {1},
pages = {--},
doi = {110.1109/TNSM.2020.3037019},
note = {keyword=anomaly, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20tnsm-b.pdf},
topic = {ad-algo}
}
Recent technology evolution allows network equipment to continuously stream a wealth of “telemetry” information, which pertains to multiple protocols and layers of the stack, at a very fine spatial-grain and high-frequency. This deluge of telemetry data clearly offers new opportunities for network control and troubleshooting, but also poses a serious challenge for what concerns its real-time processing. We tackle this challenge by applying streaming machine-learning techniques to the continuous flow of control and data-plane telemetry data, with the purpose of real-time detection of anomalies. In particular, we implement an anomaly detection engine that leverages DenStream, an unsupervised clustering technique, and apply it to features collected from a large-scale testbed comprising tens of routers traversed up to 3 Terabit/s worth of real application traffic. We contrast DenStream with offline algorithms such as DBScan and Local Outlier Factor (LOF), as well as online algorithms such as the windowed version of DBScan, ExactSTORM, Continuous Outlier Detection (COD) and Robust Random Cut Forest (RRCF). Our experimental campaign compares these seven algorithms under both accuracy and computational complexity viewpoints: results testify that DenStream (i) achieves detection results on par with RRCF, the best performing algorithm and (ii) is significantly faster than other approaches, notably over two orders of magnitude faster than RRCF. In spirit with the recent trend toward reproducibility of results, we make our code available as open source to the scientific community.
@inproceedings{DR:CNSM-20b,
author = {Huet, Alexis and Houidi, Zied Ben and Mathieu, Bertrand and Rossi, Dario},
title = {Detecting Degradation of Web Browsing Quality of Experience (QoE)},
booktitle = {IEEE International Conference on Network and Service Management (CNSM)},
month = nov,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20cnsm-b.pdf},
topic = {web-qoe}
}
Quality of Experience (QoE) inference, and particularly the detection of its degradation is an important management tool for ISPs. Yet, this task is made difficult due to widespread use of encryption on the data-plane on the one hand so that measuring QoE is hard, and to the ephemeral properties of the web content on the other hand so that changes in QoE indicators may be rooted in changes in properties of the content itself, more than being caused by network-related events. In this paper, we phrase the QoE degradation detection issue as a change point detection problem, that we tackle by leveraging a unique dataset consisting on several hundreds thousands browsing sessions spanning multiple months. Our results, beyond showing feasibility, warn about the exclusive use of QoE indicators that are very close to content, as changes in the content space can lead to false alarms that are not tied to network-related problems.
[PATENT-PCT/EP2020/080037]
Roberts, James and Rossi, Dario,
"Method Of Managing Data Transmission For Ensuring Per-Flow Fair Bandwidth Sharing " , Patent PCT/EP2020/080037
oct.
2020,
Patent
@misc{DR:PATENT-PCT/EP2020/080037,
author = {Roberts, James and Rossi, Dario},
title = {Method Of Managing Data Transmission For Ensuring Per-Flow Fair Bandwidth Sharing },
month = oct,
note = {Patent PCT/EP2020/080037},
year = {2020},
patent = {True}
}
[PATENT-PCT/EP2020/077766]
Alexis Huet, Dario Rossi,
"Detecting A Network-Induced Contribution To A Quality Of Experience Degradation " , Patent PCT/EP2020/077766
oct.
2020,
Patent
@misc{DR:PATENT-PCT/EP2020/077766,
author = {Alexis Huet, Dario Rossi},
title = {Detecting A Network-Induced Contribution To A Quality Of Experience Degradation },
month = oct,
note = {Patent PCT/EP2020/077766},
year = {2020},
patent = {True}
}
[IJCAIFL-20]
Yang, Lixuan and Beliard, Cedric and Rossi, Dario,
"Heterogeneous Data-Aware Federated Learning"
International Joint Conference on Artificial Intelligence (IJCAI), Workshop on Federated Learning
sep.
2020,
DOI
Conference
@inproceedings{DR:IJCAIFL-20,
author = {Yang, Lixuan and Beliard, Cedric and Rossi, Dario},
title = {Heterogeneous Data-Aware Federated Learning},
booktitle = {International Joint Conference on Artificial Intelligence (IJCAI), Workshop on Federated Learning},
month = sep,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=deeplearning, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20ijcai-fl.pdf},
topic = {tc-train}
}
Federated learning (FL) is an appealing concept to perform distributed training of Neural Networks (NN) while keeping data private. With the industrialization of the FL framework, we identify several problems hampering its successful deployment, such as presence of non i.i.d data, disjoint classes, signal multi-modality across datasets. In this work, we address these problems by proposing a novel method that not only (1) aggregates generic model parameters (e.g. a common set of task generic NN layers) on server (e.g. in traditional FL), but also (2) keeps a set of parameters (e.g, a set of task specific NN layer) specific to each client. We validate our method on the traditionally used public benchmarks (e.g., Femnist) as well as on our proprietary collected dataset (i.e., traffic classification). Results show the benefit of our method, with significant advantage on extreme cases.
@inproceedings{DR:IJCAI-NETAML-20,
author = {Yang, Lixuan and Finamore, Alessandro and Rossi, Dario},
title = {Deep Learning and Traffic Classification: A critical review with novel },
booktitle = {International Joint Conference on Artificial Intelligence (IJCAI), Workshop on Network Traffic Analytics using Machine Learning},
note = {keyword=deeplearning,classification, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20ijcai-netaml.pdf},
month = sep,
year = {2020}
}
The increased success of Convolutional Neural Networks (CNNs) has recently re-sparked interest towards traffic classification (TC). New literature shows the potential of reaching perfect classification accuracy, yet at the core of those works still reside the same limitations surfaced in the 1st wave of TC techniques, started in early 2000s and culminated with the application of Machine Learning (ML) for "early classification". To better highlight and discuss such issues, in this paper we report on novel insights based on a commercial-grade traffic classification engine. We aim to perform a critical review of the state of the art, introducing the research community to problems arising at a commercial scale (30× the largest number of classes in academic literature), discussing the pitfalls and how to avoid them. Leveraging a dataset comprising millions of flows and thousands of labels, we perform a fair comparison of classic Machine Learning (ML) and novel Deep Learning (DL) architectures based on the same inputs. We additionally put emphasis that comparing techniques on the mere raw performance make no sense unless they are fairly compared in terms of their computational requirements, an often forgotten key aspect affecting deployment.
@inproceeding{DR:ITC-20,
author = {Navarro, Jose M. and Rossi, Dario},
title = {HURRA! Human readable router anomaly detection},
booktitle = {International Teletraffic Congress (ITC32)},
month = sep,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=anomaly,bestpaperaward, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20itc.pdf},
topic = {ad-fs}
}
This paper presents HURRA, a system that aims to reduce the time spent by human operators in the process
of network troubleshooting. To do so, it comprises two modules that are plugged after any anomaly detection algorithm: (i) a
first attention mechanism, that ranks the present features in terms of their relation with the anomaly and (ii) a second
module able to incorporates previous expert knowledge seamlessly, without any need of human interaction nor decisions. We show
the efficacy of these simple processes on a collection of real router datasets obtained from tens of ISPs which exhibit a
rich variety of anomalies and very heterogeneous set of KPIs, on which we gather manually annotated ground truth by the
operator solving the troubleshooting ticket. Our experimental evaluation shows that (i) the proposed system is effective in
achieving high levels of agreement with the expert, that (ii) even a simple statistical approach is able to extracting useful information
from expert knowledge gained in past cases to further improve performance and finally that (iii) the main difficulty in live
deployment concerns the automated selection of the anomaly detection algorithm and the tuning of its hyper-parameters.
@inproceedings{DR:SIGCOMM-20,
author = {Gallo, Massimo and Finamore, Alessandro and Simon, Gwendal and Rossi, Dario},
title = {Real-time Deep Learning based Traffic Analytics},
booktitle = {ACM SIGCOMM, Demo session},
month = aug,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=deeplearning,system,classification, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20sigcomm.pdf},
topic = {tc-system}
}
The increased interest towards Deep Learning (DL) tech-nologies has led to the development of a new generation of specialized hardware accelerator such as Graphic Processing Unit (GPU) and Tensor Processing Unit (TPU). The integration of such components in network routers is however not trivial. Indeed, routers typically aim to minimize the overhead of per-packet processing, and design choices to integrate a new accelerator need to factor in these key requirements. The literature and benchmarks on DL hardware accelerators have overlooked specific router constraints (e.g., strict latency) and focused instead on cloud deployment and image processing. Likewise,there is limited literature regarding DL application on traffic processing at line-rate. Among all hardware accelerators, we are interested in edge TPUs. Since their design focuses on DL inference, edge TPUs matches the vision of operators, who consider running pre-trained DL models in routers with low power drain. Edge TPUs are expected to limit the amount of computational resources for inference and to yield a higher ratio of operations-per-watt footprint than GPUs.This demo aims to investigate the operational points at which edge TPUs become a viable option, using traffic classification as a use case. We sketch the design of a real-time DL traffic classification system, and compare inference speed (i.e., number of classifications per second) of a state-of-the-art Convolutional Neural Network (CNN) model running on different hardware (CPU, GPU, TPU). To constrast their performance, we run stress tests based on synthetic traffic and under different conditions. We collect the results into a dashboard which enables network operators and system designers to both explore the stress test results with regards to their considered operational points, as well as triggering synthetic live tests on top of Ascend310 TPUs.
@inproceedings{DR:INFOCOM-20a,
author = {Navarro, Jose Manuel and Rossi, Dario},
title = {HURRA: Human-Readable Router Anomaly Detection},
booktitle = {IEEE INFOCOM, Demo session},
month = jul,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=anomaly, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20infocom-a.pdf},
topic = {ad-fs}
}
Automated troubleshooting tools must be based on
solid and principled algorithms to be useful. However, these tools
need to be easily accessible for non-experts, thus requiring to also
be usable. This demo combines both requirements by combining
an anomaly detection engine inspired by Auto-ML principles,
that combines multiple methods to find robust solutions, with
automated ranking of results to provide an intuitive interface that
is remindful of a search engine. The net result is that HURRA!
simplifies as much as possible human operators interaction while
providing them with the most useful results first. In the demo,
we contrast manual labeling of individual features gathered from
human operators from real troubleshooting tickets with results
returned by the engine — showing an empirically good match at
a fraction of the human labor.
@inproceedings{DR:INFOCOM-20b,
author = {Beliard, Cedric and Finamore, Alessandro and Rossi, Dario},
title = {Opening the Deep Pandora Box: Explainable Traffic Classification},
booktitle = {IEEE INFOCOM, Demo session},
month = jul,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=deeplearning,classification, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20infocom-b.pdf},
topic = {tc-xai}
}
Fostered by the tremendous success in the image
recognition field, recently there has been a strong push for the
adoption of Convolutional Neural Networks (CNN) in networks,
especially at the edge, assisted by low-power hardware equipment
(known as “tensor processing units”) for the acceleration of CNN-
related computations. The availability of such hardware has reignited the interest for traffic classification approaches that are
based on Deep Learning. However, unlike tree-based approaches
that are easy to interpret, CNNs are in essence represented by
a large number of weights, whose interpretation is particularly
obscure for the human operators. Since human operators will
need to deal, troubleshoot, and maintain these automatically
learned models, that will replace the more easily human-readable
heuristic rules of DPI classification engine, there is a clear need
to open the “deep pandora box”, and make it easily accessible for
network domain experts. In this demonstration, we shed light in
the inference process of a commercial-grade classification engine
dealing with hundreds of classes, enriching the classification
workflow with tools to enable better understanding of the inner
mechanics of both the traffic and the models.
@inproceedings{DR:INFOCOM-20c,
author = {Sviridov, German and Beliard, Cedric and Simon, Gwendal and Bianco, Andrea and Giaccone, Paolo and Rossi, Dario},
title = {Leveraging AI players for QoE estimation in cloud gaming},
booktitle = {IEEE INFOCOM, Demo session},
month = jul,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=qoe,deeplearning, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20infocom-c.pdf},
topic = {game-qoe}
}
Quality of Experience (QoE) assessment in video
games is notorious for its burdensomeness. Employing human
subjects to understand network impact on the perceived gaming
QoE presents major drawbacks in terms of resources requirement, results interpretability and poor transferability across
different games.
To overcome these shortcomings, we propose to substitute
human players with artificial agents trained with state-of-the-art
Deep Reinforcement Learning techniques. Equivalently to traditional QoE assessment, we measure the in-game score achieved
by an artificial agent for the game of Doom for varying network
parameters. Our results show that the proposed methodology
can be applied to understand fine-grained impact of network
conditions on gaming experience while opening a lot of new
opportunities for network operators and game developers.
@inproceedings{DR:INFOCOM-20d,
title = {Removing human players from the loop: AI-assisted assessment of Gaming QoE},
author = {},
booktitle = {IEEE INFOCOM, Workshop on Network Intelligence},
month = jul,
year = {2020},
note = {keyword=qoe,deeplearning project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20infocom-d.pdf},
topic = {game-qoe}
}
Quality of Experience (QoE) assessment for video
games is known for being a heavy-weight process, typically
requiring the active involvement of several human players and
bringing limited transferability across games. Clearly, to some
extent, QoE is correlated with the achieved in-game score, as
player frustration will arise whenever realized performance is far
from what is expected due to conditions beyond player control
such as network congestion in the increasingly prevalent case
of networked games. To disrupt the status quo, we propose
to remove human players from the loop and instead exploit
Deep Reinforcement Learning (DRL) agents to play games under
varying network conditions. We apply our framework to a set of
Atari games with different types of interaction, showing that the
score degradation observed with DRL agents can be exploited
in networking devices (e.g., by prioritizing scheduling decisions),
reinforcing fairness across games, and thus enhancing the overall
quality of gaming experience
@inproceedings{DR:MedComNet-20,
author = {Salutari, Flavia and Varvello, Matteo and Teixeira, Renata and Christophides, Vassilis and Rossi, Dario and Hora, Diego Da},
title = {Implications of User Perceived Page Load Time Multi-Modality on Web QoE Measurement},
booktitle = {MedComNet},
month = jun,
year = {2020},
volume = {},
pages = {},
doi = {},
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20medcomnet.pdf},
topic = {web-qoe}
}
Web browsing is one of the most popular applications for both desktop and mobile users. A lot of effort has been devoted to speedup the Web, as well as in designing metrics that can accurately tell whether a webpage loaded fast or not. An often implicit assumption made by industrial and academic research communities is that a single metric is sufficient to assess whether a webpage loaded fast. In this paper we collect and make publicly available a unique dataset which contains webpage features (eg number and type of embedded objects) along with both objective and subjective Web quality metrics. This dataset was collected by crawling over 100 websites–representative of the top 1,000,000 websites in the Web — while crowdsourcing 6,000 user opinions on user perceived page load time (uPLT). We show that the uPLT distribution is often multi-modal and that, in practice, no more than three modes are present. The main conclusion drawn from our analysis is that, for complex webpages, each of the different objective QoE metrics proposed in the literature (such as AFT, TTI, PLT, etc.) is suited to approximate one of the different uPLT modes.
[NETWORKING-20]
Huet, Alexis and Saverimoutou, Antoine and Houidi, Zied Ben and Hao Shi, Shengming Cai and Xu, Jinchun and Mathieu, Bertrand and Rossi, Dario,
"Revealing QoE of Web Users from Encrypted Network Traffic"
IFIP Networking
jun.
2020,
Conference
@inproceedings{DR:NETWORKING-20,
title = {Revealing QoE of Web Users from Encrypted Network Traffic},
author = {Huet, Alexis and Saverimoutou, Antoine and Houidi, Zied Ben and Hao Shi, Shengming Cai and Xu, Jinchun and Mathieu, Bertrand and Rossi, Dario},
year = {2020},
booktitle = {IFIP Networking},
venue = {Paris},
month = jun,
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20networking.pdf},
topic = {web-qoe}
}
Internet Service Providers (ISPs) have a lot to gain
from estimating the Web browsing quality of their customers.
However, unlike Content Providers (CPs) who can easily access
in-browser computed application-level metrics to estimate Web
browsing quality, ISPs come short mainly because of traffic
encryption. In this paper, we use exact methods and machine
learning to estimate well-known application-level Web browsing
QoS metrics (such as SpeedIndex and Page Load Time) from raw
encrypted streams of network traffic. Particularly, we present and
open-source a unique dataset targeting a large set of popular
pages (Alexa top-500), from probes from several ISPs networks,
browsers software (Chrome, Firefox) and viewport combinations,
for over 200,000 experiments. Our results show our models to be
accurate, and we particularly focus on their ability to generalize
to previously unseen conditions, giving guidance concerning their
retraining
@article{DR:TNSM-20a,
author = {{Salutari}, F. and {Da Hora}, D. and {Dubuc}, G. and {Rossi}, D.},
journal = {IEEE Transactions on Network and Service Management},
title = {Analyzing Wikipedia Users Perceived Quality Of Experience: A Large-Scale Study},
year = {2020},
month = jun,
volume = {17},
number = {2},
pages = {1082--1095},
doi = {10.1109/TNSM.2020.2978685},
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi20tnsm-a.pdf},
topic = {web-qoe}
}
The Web is one of the most successful Internet applications. Yet, the quality of Web users’ experience is still largely impenetrable. Whereas Web performance is typically studied with controlled experiments, in this work we perform a large-scale study of a real site, Wikipedia, explicitly asking (a small fraction of its) users for feedback on the browsing experience. The analysis of the collected feedback reveals that 85% of users are satisfied, along with both expected (e.g., the impact of browser and network connectivity) and surprising findings (e.g., absence of day/night, weekday/weekend seasonality) that we detail in this paper. Also, we leverage user responses to build supervised data-driven models to predict user satisfaction which, despite including state-of-the art quality of experience metrics, are still far from achieving accurate results (0.62 recall of negative answers). Finally, we make our dataset publicly available, hopefully contributing in enriching and refining the scientific community knowledge on Web users’ QoE.
[PATENT-PCT/EP2020/061440]
Yang, Lixuan and Beliard, Cedric and Rossi, Dario,
"Devices, Methods, And System For Heterogeneous Data- Adaptive Federated Learning " , Patent PCT/EP2020/061440
apr.
2020,
Patent
@misc{DR:PATENT-PCT/EP2020/061440,
author = {Yang, Lixuan and Beliard, Cedric and Rossi, Dario},
title = {Devices, Methods, And System For Heterogeneous Data- Adaptive Federated Learning },
month = apr,
note = {Patent PCT/EP2020/061440 },
year = {2020},
patent = {True}
}
[PATENT-PCT/EP2020/050319]
Huet, Alexis and Rossi, Dario,
"Devices And Methods For Web Quality Evaluation " , Patent PCT/EP2020/050319
jan.
2020,
Patent
@misc{DR:PATENT-PCT/EP2020/050319,
author = {Huet, Alexis and Rossi, Dario},
title = {Devices And Methods For Web Quality Evaluation },
month = jan,
note = {Patent PCT/EP2020/050319 },
year = {2020},
patent = {True}
}
[PATENT-PCT/EP2020/052332]
Navarro, Jose Manuel and Rossi, Dario,
"Device For Monitoring A Computer Network System " , Patent PCT/EP2020/052332
jan.
2020,
Patent
@misc{DR:PATENT-PCT/EP2020/052332,
author = {Navarro, Jose Manuel and Rossi, Dario},
title = {Device For Monitoring A Computer Network System },
month = jan,
note = {Patent PCT/EP2020/052332 },
year = {2020},
patent = {True}
}
[PATENT-US10721295]
MP Enguehard, G Carofiglio, D Rossi,
"Popularity-based load-balancing for fog-cloud placement" , Patent US10721295 patent, keyword=ccn
2020,
Patent
@misc{DR:PATENT-US10721295,
author = {MP Enguehard, G Carofiglio, D Rossi},
title = {Popularity-based load-balancing for fog-cloud placement},
howpublished = {Patent US10721295},
note = {patent, keyword=ccn},
year = {2020},
patent = {True}
}
@inproceedings{DR:WWW-19,
title = {{A large-scale study of Wikipedia users' Quality of Experience}},
author = {Salutari, Flavia and Hora, Diego Da and Dubuc, Gilles and Rossi, Dario},
booktitle = {The Web Conference (WWW'19)},
month = may,
year = {2019},
note = {keyword=qoe, project=huawei},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19www.pdf},
dataseturl = {https://webqoe.telecom-paristech.fr/data/}
}
The Web is one of the most successful Internet application. Yet,
the quality of Web users’ experience is still largely impenetrable.
Whereas Web performances are typically gathered with controlled experiments,
in this work we perform a large-scale study of one of the most popular Web sites, namely Wikipedia, explicitly asking (a small fraction of its) users for feedback on the browsing experience.
We leverage user survey responses to build a data-driven model of user satisfaction which, despite including state-of-the art quality of experience metrics, is still far from achieving accurate results, and discuss directions to move forward. Finally, we aim at making our dataset publicly available, which hopefully contributes in enriching and refining the scientific community knowledge on Web users’ quality of experience (QoE).
@inproceedings{DR:INFOCOM-19b,
author = {Huet, Alexis and Rossi, Dario},
title = {Explaining Web users' QoE with Factorization Machines},
booktitle = {IEEE INFOCOM},
month = apr,
year = {2019},
location = {Paris, France},
note = {keyword=qoe,ai, project=huawei},
howpublished = {https://nonsns.github.io/paper/rossi19infocom-b.pdf},
demourl = {https://huet.shinyapps.io/webqoe/}
}
Whereas most of the literature employs classic machine learning techniques (such as C4.5 trees, Random Forest and Support Vector Machines) to improve forecast accuracy of QoE models, in this demo we explore the use of an information filtering system (Factorization Machine) to get fundamental insights and explain the relationship between QoE and different features.
[IEEEPROC-19]
Linguaglossa, L. and Lange, S. and Pontarelli, S. and Retvari, G. and Rossi, D. and Zinner, T. and Bifulco, R. and Jarschel, M. and Bianchi, G.,
"Survey of Performance Acceleration Techniques for Network Function Virtualization"
In Proceedings of the IEEE,
Vol. 107,
No. 4,
pp.746-764,
apr.
2019,
DOI 10.1109/JPROC.2019.2896848
Journal
@article{DR:IEEEPROC-19,
author = {{Linguaglossa}, L. and {Lange}, S. and {Pontarelli}, S. and {Retvari}, G. and {Rossi}, D. and {Zinner}, T. and {Bifulco}, R. and {Jarschel}, M. and {Bianchi}, G.},
journal = {Proceedings of the IEEE},
title = {Survey of Performance Acceleration Techniques for Network Function Virtualization},
year = {2019},
volume = {107},
number = {4},
pages = {746-764},
keywords = {Acceleration;Performance evaluation;Ecosystems;Network function virtualization;Virtualization;Virtualization;Communication networks;Fast packet processing;network function virtualization (NFV);offloading;performance acceleration;virtualization},
doi = {10.1109/JPROC.2019.2896848},
issn = {0018-9219},
month = apr,
note = {keyword=vpp,highspeed, project=newnet},
howpublished = {https://nonsns.github.io/paper/rossi19ieeeproc.pdf}
}
The ongoing network softwarization trend holds the promise to revolutionize network infrastructures by making them more flexible, reconfigurable, portable, and more adaptive than ever. Still, the migration from hard-coded/hard-wired network functions toward their software-programmable counterparts comes along with the need for tailored optimizations and acceleration techniques so as to avoid or at least mitigate the throughput/latency performance degradation with respect to fixed function network elements. The contribution of this paper is twofold. First, we provide a comprehensive overview of the host-based network function virtualization (NFV) ecosystem, covering a broad range of techniques, from low-level hardware acceleration and bump-in-the-wire offloading approaches to high-level software acceleration solutions, including the virtualization technique itself. Second, we derive guidelines regarding the design, development, and operation of NFV-based deployments that meet the flexibility and scalability requirements of modern communication networks.
@inproceedings{DR:INFOCOM-19a,
author = {Lange, Stanislav and Linguaglossa, Leonardo and Geissler, Stefan and Rossi, Dario and Zinner, Thomas},
title = {Discrete-time modeling of {NFV} accelerators that exploit batched processing},
booktitle = {IEEE INFOCOM},
month = apr,
year = {2019},
location = {Paris, France},
note = {keyword=vpp,highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19infocom-a.pdf}
}
Network Functions Virtualization (NFV) is among the latest network revolutions, bringing flexibility and avoiding network ossification. At the same time, all-software NFV implementations on commodity hardware raise performance issues with respect to ASIC solutions. To address these issues, numerous software acceleration frameworks for packet processing have appeared in the last few years. Common among these frameworks is the use of batching techniques. In this context, packets are processed in groups as opposed to individually, which is required at high-speed to minimize the framework overhead, reduce interrupt pressure, and leverage instruction-level cache hits. Whereas several system implementations have been proposed and experimentally benchmarked, the scientific community has so far only to a limited extent attempted to model the system dynamics of modern NFV routers exploiting batching acceleration. In this paper, we fill this gap by proposing a simple generic model for such batching-based mechanisms, which allows a very detailed prediction of highly relevant performance indicators. These include the distribution of the processed batch size as well as queue size, which can be used to identify loss-less operational regimes or quantify the packet loss probability in high-load scenarios. We contrast the model prediction with experimental results gathered in a high-speed testbed including an NFV router, showing that the model not only correctly captures system performance under simple conditions, but also in more realistic scenarios in which traffic is processed by a mixture of functions.
@article{DR:COMNET-19,
author = {},
title = {High-Speed Data Plane and Network Functions Virtualization by Vectorizing Packet Processing},
journal = {Elsevier Computer Networks},
month = feb,
year = {2019},
volume = {149},
pages = {187--199},
doi = {https://doi.org/10.1016/j.comnet.2018.11.033},
note = {keyword=vpp,highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19comnet.pdf}
}
In the last decade, a number of frameworks started to appear that implement, directly in user-space with kernel-bypass
mode, high-speed software data plane functionalities on commodity hardware. Vector Packet Processor (VPP) is one
of such frameworks, representing an interesting point in the design space in that it offers: (i) in user-space networking,
(ii) the flexibility of a modular router (Click and variants) with (iii) the benefits brought by techniques such as batch
processing that have become commonplace in high-speed networking stacks (such as netmap or DPDK). Similarly to
Click, VPP lets users arrange functions as a processing graph, providing a full-blown stack of network functions. However,
unlike Click where the whole tree is traversed for each packet, in VPP each traversed node processes all packets in the
batch (called vector ) before moving to the next node. This design choice enables several code optimizations that greatly
improve the achievable processing throughput. This paper introduces the main VPP concepts and architecture, and
experimentally evaluates the impact of design choices (such as batch packet processing) on its performance.
[TNSM-19]
Zhang, Tianzhu and Linguaglossa, Leonardo and Gallo, Massimo and Giaccone, Paolo and Rossi, Dario,
"FloWatcher-DPDK: Lightweight line-rate flow-level monitoring in software"
In IEEE Transactions on Networks and Service Management,
Vol. ,
No. ,
pp.,
.
2019,
DOI 10.1109/TNSM.2019.2913710
Journal
@article{DR:TNSM-19,
author = {Zhang, Tianzhu and Linguaglossa, Leonardo and Gallo, Massimo and Giaccone, Paolo and Rossi, Dario},
title = {{FloWatcher-DPDK: Lightweight line-rate flow-level monitoring in software}},
journal = {IEEE Transactions on Networks and Service Management},
year = {2019},
volume = {},
number = {},
pages = {},
keywords = {Monitoring;Tools;Software;Hardware;Generators;Loss measurement;Benchmark testing;Network traffic monitoring;high-speed packet processing;per-flow packet measurement;Intel DPDK.},
doi = {10.1109/TNSM.2019.2913710},
issn = {1932-4537},
month = {},
note = {keyword=highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19tnsm.pdf}
}
In the last few years, several software-based solutions have been proved to be very efficient for high-speed packet processing, traffic generation and monitoring, and can be considered valid alternatives to expensive and non-flexible hardware-based solutions. In our work, we first benchmark heterogeneous design choices for software-based packet monitoring systems in terms of achievable performance and required resources (i.e., the number of CPU cores). Building on this extensive analysis we design FloWatcher-DPDK, a DPDK-based high-speed software traffic monitor we provide to the community as an open source project. In a nutshell, FloWatcher-DPDK provides tunable fine-grained statistics at packet and flow levels. Experimental results demonstrate that FloWatcher-DPDK sustains per-flow statistics with 5-nines precision at high-speed (e.g., 14.88 Mpps) using a limited amount of resources. Finally, we showcase the usage of FloWatcher-DPDK by configuring it to analyze the performance of two open source prototypes for stateful flow-level end-host and in-network packet processing.
[TON-19]
Daly, James and Bruschi, Valerio and Linguaglossa, Leonardo and Pontarelli, Salvatore and Rossi, Dario and Tollet, Jerome and Torng, Eric and Yourtchenko, Andrew,
" TupleMerge: Fast Software Packet Processing for Online Packet Classification"
In IEEE Transactions on Networking,
Vol. ,
No. ,
pp.,
2019,
DOI 10.1109/TNET.2019.2920718
Journal
@article{DR:TON-19,
title = { TupleMerge: Fast Software Packet Processing for Online Packet Classification},
doi = {10.1109/TNET.2019.2920718},
journal = {IEEE Transactions on Networking},
author = {Daly, James and Bruschi, Valerio and Linguaglossa, Leonardo and Pontarelli, Salvatore and Rossi, Dario and Tollet, Jerome and Torng, Eric and Yourtchenko, Andrew},
year = {2019},
volume = {},
number = {},
pages = {},
note = {keyword=vpp,highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19ton.pdf}
}
Packet classification is an important part of many
networking devices, such as routers and firewalls. Software
Defined Networking (SDN) heavily relies on online packet classification which must efficiently process two different streams:
incoming packets to classify and rules to update. This rules out
many offline packet classification algorithms that do not support
fast updates. We propose a novel online classification algorithm,
TupleMerge (TM), derived from Tuple Space Search (TSS), the
packet classifier used by Open vSwitch (OVS). TM improves
upon TSS by combining hash tables which contain rules with
similar characteristics. This greatly reduces classification time
preserving similar performance in updates.
We validate the effectiveness of TM using both simulation
and deployment in an full-fledged software router, specifically
within Vector Packet Processor (VPP). In our simulation results,
which focus solely on the efficiency of the classification algorithm,
we demonstrate that TM outperforms all other state of the art
methods including TSS, PartitionSort (PS), and SAX-PAC. For
example, TM is 34% faster at classifying packets and 30% faster
at updating rules than PS. We then evaluate experimentally TM
deployed within the VPP framework comparing TM against linear search and TSS, and also against TSS within the OVS framework. This validation of deployed implementations is important
as SDN frameworks have several optimizations such as caches
that may minimize the influence of a classification algorithm. Our
experimental results clearly validate the effectiveness of TM. VPP
TM classifies packets nearly two orders of magnitude faster than
VPP TSS and at least one order of magnitude faster than OVS
TSS.
@inproceedings{DR:SIGCOMM-19,
title = {{Web Quality of Experience from Encrypted Packets}},
author = {Huet, Alexis and Houidi, Zied Ben and Cai, Shengming and Shi, Hao and Xu, Jinchun and Rossi, Dario},
booktitle = {{ACM SIGCOMM, Demo Session}},
year = {2019},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi19sigcomm.pdf},
note = {keyword=qoe,ai, project=huawei},
demourl = {https://huawei-webqoe.shinyapps.io/sigcommdemo/},
videourl = {https://perso.telecom-paristech.fr/drossi/data/videos/ACM-SIGCOMM-19-webqoe-demo.mp4}
}
Pervasive encryption makes it hard for ISPs to manage their
network. Yet, to avoid user churn at times of shrinking revenues, ISPs must be able to assess the quality of experience
they are delivering to their customers. The case of the Web
is particularly complex, with a plethora of recently proposed
in-browser metrics that aim at capturing the page visual rendering quality (e.g. Above the Fold and SpeedIndex). In this
demo, we showcase that such metrics can be estimated quite
accurately just from streams of encrypted packets, using
classic supervised learning techniques.
2018 # 21
[COMMAG-18]
Barach, David and Linguaglossa, Leonardo and Marion, Damjan and Pfister, Pierre and Pontarelli, Salvatore and Rossi, Dario,
"High-speed Software Data Plane via Vectorized Packet Processing"
In IEEE Communication Magazine,
Vol. 56,
No. 12,
pp.97-103,
dec.
2018,
DOI 10.1109/MCOM.2018.1800069
Journal
@article{DR:COMMAG-18,
author = {Barach, David and Linguaglossa, Leonardo and Marion, Damjan and Pfister, Pierre and Pontarelli, Salvatore and Rossi, Dario},
journal = {IEEE Communication Magazine},
title = {High-speed Software Data Plane via Vectorized Packet Processing},
year = {2018},
volume = {56},
number = {12},
pages = {97-103},
keywords = {Vector Packet Processing; High-speed networking; System design},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18commag.pdf},
doi = {10.1109/MCOM.2018.1800069},
issn = {0163-6804},
month = dec
}
In the last decade, a number of frameworks started to appear that implement, directly in user-space with kernel-bypass mode, high-speed software data plane functionalities on commodity hardware.
Vector Packet Processor (VPP) is one of such frameworks, representing an interesting point in the design space in that it offers: (i) in user-space networking, (ii) the flexibility of a modular router (Click and variants) with (iii) the benefits brought by techniques such as batch processing that have become commonplace in high-speed networking stacks (such as netmap or DPDK).
Similarly to Click, VPP lets users arrange functions as a processing graph, providing a full-blown stack of network functions. However, unlike Click where the whole tree is traversed for each packet, in VPP each traversed node processes all packets in the batch (called \textitvector) before moving to the next node.
This design choice enables several code optimizations that greatly improve the achievable processing throughput. This paper introduces the main VPP concepts and architecture, and experimentally evaluates the impact of design choices (such as batch packet processing) on performance.
@article{DR:TGCN-18,
author = {Enguehard, Marcel and Droms, Ralph and Rossi, Dario},
journal = {IEEE Transactions on Green Communications and Networking},
title = {On the cost of geographic forwarding for information-centric things},
keywords = {Cryptography;Data models;Architecture;Protocols;Internet of Things;Topology},
year = {2018},
month = dec,
volume = {2},
issue = {4},
pages = {1150-1163},
doi = {10.1109/TGCN.2018.2867267},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18tgcn.pdf},
issn = {2473-2400}
}
Recent research has identified Information-Centric Networking (ICN) as a good fit for Internet of Things (IoT) deployments. However, most studies have focused on ICN as an application enabler, disregarding the behaviour from a network viewpoint. In this paper, we address this by studying the most important properties of an ICN-IoT deployment and contrast the operational costs between geographic-based forwarding and name-based forwarding schemes. We aim to understand if, and under which IoT deployment characteristics, geographic forwarding constitutes an advantage over name-based schemes, in terms of feasibility (i.e., memory footprint and computational capability of the devices) and performance (which we analyze as the overall energy cost of operating an ICN-IoT network under either forwarding paradigm). To achieve this goal, we employ a mixture of (i) modelling, (ii) simulative and (iii) experimental methodologies, which are useful to respectively (i) state the problem in a principled way, (ii) gather information about topological properties that are instrumental to the model and (iii) gather physical properties of the devices to feed the model with realistic data. In a nutshell, our results show that geographic forwarding (i) halves the memory footprint on our reference deployments and (ii) yields significant energy savings, especially for dynamic topologies.
@inproceedings{DR:ITC-18,
author = {Enguehard, Marcel and Carofiglio, Giovanna and Rossi, Dario},
title = {A popularity-based approach for effective Cloud offload in Fog clusters},
booktitle = {30th International Teletraffic Congress (ITC30)},
month = sep,
location = {Vienna, Austria},
year = {2018},
note = {keyword=icn, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18itc.pdf}
}
Recent research has put forward the concept of Fog
computing, a deported intelligence for IoT networks. Fog clusters
are meant to complement current cloud deployments, providing
compute and storage resources directly in the access network –
which is particularly useful for low-latency applications. How-
ever, Fog deployments are expected to be less elastic than cloud
platforms, since elasticity in Cloud platforms comes from the
scale of the data-centers. Thus, a Fog node dimensioned for
the average traffic load of a given application will not be able
to handle sudden bursts of traffic. In this paper, we explore
such a use-case, where a Fog-based latency-sensitive application
must offload some of its processing to the Cloud. We build an
analytical queueing model for deriving the statistical response
time of a Fog deployment under different request Load Balancing
(LB) strategies, contrasting a naive, an ideal (LFU-LB, assuming
a priori knowledge of the request popularity) and a practical
(LRU-LB, based on online learning of the popularity with an
LRU filter) scheme. Using our model, and confirming the results
through simulation, we show that our LRU-LB proposal achieves
close-to-ideal performance, with high savings on Cloud offload
cost with respect to a request-oblivious strategy in the explored
scenarios.
@inproceedings{DR:SIGCOMM-18a,
title = {A practical method for measuring Webabove-the-fold time},
author = {da Hora, Diego Neves and Rossi, Dario and Christophides, Vassilis and Teixeira, Renata},
booktitle = {ACM SIGCOMM, Demo Session},
address = {Budapest, Hungary},
month = aug,
year = {2018},
note = {keyword=webqoe, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18sigcomm-a.pdf}
}
Page load time (PLT) is still the most common application Quality of Service (QoS) metric to estimate the Quality of Experience (QoE) of Web users. Yet, recent literature abounds with interesting proposals for alternative metrics (e.g., Above The Fold, SpeedIndex and variants) that aim at closely capturing how users perceive the Webpage rendering process. However, these novel metrics are typically computationally expensive, as they require to monitor and post-process videos of the rendering process, and have failed to be widely deployed. In this demo, we show our implementation of an open-source Chrome extension that implements a practical and lightweight method to measure the approximated Above-the-Fold (AATF) time, as well as others Web performance metrics. The idea is, instead of accurately monitoring the rendering output, to track the download time of the last visible object on screen (i.e., “above the fold”). Our plugin also has options to save detailed reports for later analysis, a functionality ideally suited for researchers wanting to gather data from Web experiments.
@article{DR:JSAC-18,
title = {Parallel Simulation of Very Large-Scale General Cache Networks},
author = {Tortelli, Michele and Rossi, Dario and Leonardi, Emilio},
year = {2018},
journal = {IEEE Journal on Selected Areas in Communication (JSAC)},
volume = {36},
month = aug,
pages = {1871--1886},
note = {keyword=ccn, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18jsac.pdf},
doi = {10.1109/JSAC.2018.2844938}
}
In this paper we propose a methodology for the
study of general cache networks, which is intrinsically scalable
and amenable to parallel execution. We contrast two techniques:
one that slices the network, and another that slices the content
catalog. In the former, each core simulates requests for the whole
catalog on a subgraph of the original topology, whereas in the
latter each core simulates requests for a portion of the original
catalog on a replica of the whole network. Interestingly, we find
out that when the number of cores increases (and so the split
ratio of the network topology), the overhead of message passing
required to keeping consistency among nodes actually offsets
any benefit from the parallelization: this is strictly due to the
correlation among neighboring caches, meaning that requests
arriving at one cache allocated on one core may depend on
the status of one or more caches allocated on different cores.
Even more interestingly, we find out that the newly proposed
catalog slicing, on the contrary, achieves an ideal speedup in the
number of cores. Overall, our system, which we make available as
open source software, enables performance assessment of large-scale general cache networks, i.e., comprising hundreds of nodes,
trillions contents, and complex routing and caching algorithms,
in minutes of CPU time and with exiguous amounts of memory.
@inbook{DR:SIGCOMM-18c,
author = {Addanki, Vamsi and Linguaglossa, Leonardo and Roberts, James and Rossi, Dario},
title = {Fair dropping for multi-resource fairness
in software routers},
booktitle = {ACM SIGCOMM, Demo Session},
address = {Budapest, Hungary},
month = aug,
year = {2018},
note = {keyword=vpp,dc, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18sigcomm-c.pdf}
}
We demonstrate that fair dropping is an effective means to realize fair sharing of bandwidth and CPU in a software router. Analysis underpinning the effectiveness of the proposed approach is presented in an IFIP Networking 2018 paper [1]
@inproceedings{DR:SIGCOMM-18b,
author = {Zhang, Tianzhu and Linguaglossa, Leonardo and Gallo, Massimo and Giaccone, Paolo and Rossi, Dario},
title = { FlowMon-DPDK: Parsimonious per-flow software monitoring at line rate},
booktitle = {ACM SIGCOMM, Demo Session},
address = {Budapest, Hungary},
month = aug,
year = {2018},
note = {keyword=highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18sigcomm-b.pdf}
}
Software packet processing is an intriguing approach due to its tremendous flexibility and cost reduction compared with hardware solutions, which have long dominated software performance. However, the emergence of fast packet I/O frameworks challenges hardware supremacy, as software solutions based on commodity hardware manage to process packets at high speed (10-40 Gbps).
Whereas novel packet processing applications based on these new frameworks proliferate, fine-grained traffic monitor at high speed has received comparatively less attention. In this demonstration, we showcase FlowMon-DPDK a novel software traffic monitor recently published at TMA 2018, based on the Intel DPDK I/O framework. Our monitor is capable of providing runtime statistics at both packet- and flow-levels at 10 Gbit/s using a minimal amount of CPU resources, with packet losses that are order of magnitude smaller than state-of-the-art software. A video showing the demonstration is available at \urlhttps://youtu.be/B8uaw9UgMm0. For further details, please refer to our TMA 2018 paper.
[BIGDAMA-18]
Putina, Andrian and Rossi, Dario and Bifet, Albert and Barth, Steven and Pletcher, Drew and Precup, Cristina and Nivaggioli, Patrice,
"Telemetry-based stream-learning of BGP anomalies"
ACM SIGCOMM Workshop on Big Data Analytics and Machine Learning for Data Communication Networks (Big-DAMA’18)
aug.
2018,
Conference
@inproceedings{DR:BIGDAMA-18,
author = {Putina, Andrian and Rossi, Dario and Bifet, Albert and Barth, Steven and Pletcher, Drew and Precup, Cristina and Nivaggioli, Patrice},
title = {Telemetry-based stream-learning of BGP anomalies},
booktitle = {ACM SIGCOMM Workshop on Big Data Analytics and Machine Learning for Data Communication Networks (Big-DAMA'18)},
month = aug,
year = {2018},
location = {Budapest, Hungary},
note = {keyword=measurement,ml, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18bigdama.pdf}
}
Recent technology evolution allows network equipments to continuously stream a wealth of "telemetry" information, which pertains to multiple protocols and layers of the tack, at a very fine spatial-grain and high-frequency. Processing this deluge of telemetry data in real-time clearly offers new opportunities for network control and troubleshooting, but also poses serious challenges. We tackle this challenge by applying streaming machine-learning techniques to the continuous flow of control and data-plane telemetry data, with the purpose of real-time detection of BGP anomalies. In particular, we implement an anomaly detection engine that leverages DenStream, an unsupervised clustering technique, and apply it to features collected from a large-scale testbed comprising tens of routers traversed by 1 Terabit/s worth of real application traffic. In spirit with the recent trend toward reproducibility of research results, we make our code and datasets available as open source to the scientific community
@inproceedings{DR:QOMEX-18,
author = {Hossfeld, Tobias and Metzger, Florian and Rossi, Dario},
title = {Speed Index: Relating the Industrial Standard for User Perceived Web Performance to Web QoE},
booktitle = {10th International Conference on Quality of Multimedia Experience (QoMEX 2018)},
month = jun,
year = {2018},
location = {Sardinia, Italy},
note = {keyword=webqoe, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18qomex.pdf}
}
In 2012, Google introduced the Speed Index (SI) metric to quantify the speed of the Web page visual completeness
for the actually displayed above-the-fold (ATF) portion of a Web page. In Web browsing a page might appear to the user to be already fully rendered, even though further content may still be retrieved, resulting in the Page Load Time (PLT). This happens due to the browser progressively rendering all objects, part of which can also be located below the browser window’s current viewport. The SI metric (and variants) thereof have since established themselves as a de facto standard in Web page and browser testing. While SI is a step in the direction of including the user experience into Web metrics, the actual meaning of the metric and especially its relationship between Speed Index and Web QoE is however far from being clear. The contributions of this paper are thus to first develop an understanding of the SI based on a theoretical analysis and second, to analyze the interdependency between SI and MOS values from an existing public dataset. Specifically, our analysis is based on two well established models that map the user waiting time to a user ACR-rating of the QoE. The analysis show that ATF-based metrics are more appropriate than pure PLT as input to Web QoE models.
@inproceedings{DR:NOSSDAV-18,
author = {Samain, Jacques and Carofiglio, Giovanna and Tortelli, Michele and Rossi, Dario},
title = {A simple yet effective network-assisted signal for enhanced DASH quality of experience},
booktitle = {28th ACM SIGMM Workshop on Network and Operating Systems Support for Digital Audio and Video (NOSSDAV'18)},
month = jun,
year = {2018},
note = {keyword=icn,icndash,bestpaperaward project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18nossdav.pdf}
}
propose and evaluate simple signals coming from in-network telemetry that are effective to enhance the quality of DASH streaming. Specifically, in-network caching is known to positively affect DASH streaming quality but at the same time negatively affect the controller stability, increasing the quality switch ratio. Our contributions are to first (i) consider the broad spectrum of interaction between the network and the application, and then (ii) to devise how to effectively exploit in a DASH controller a very simple signal
(i.e., per-quality hit ratio) that can be exported by framework such as Server and Network Assisted DASH (SAND) at fairly low rate (i.e., a timescale of 10s of seconds). Our thorough experimental campaign confirms the soundness of the approach (that significantly ameliorate performance with respect to network-blind DASH), as well as its robustness (i.e., tuning is not critical) and practical appeal (i.e., due to its simplicity and compatibility with SAND).
@inproceedings{DR:TMA-18,
author = {Zhang, Tianzhu and Linguaglossa, Leonardo and Gallo, Massimo and Giaccone, Paolo and Rossi, Dario},
title = { FlowMon-DPDK: Parsimonious per-flow software monitoring at line rate},
booktitle = {Network Traffic Measurement and Analysis Conference (TMA'18)},
month = jun,
year = {2018},
location = {Wien, Austria},
note = {keyword=highspeed, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18tma.pdf}
}
Testing experimental network devices requires deep
performance analysis, which is usually performed with expensive,
not flexible, hardware equipment. With the advent of high-
speed packet I/O frameworks, general purpose equipments have
narrowed the performance gap in respect of dedicated hardware
and a variety of software-based solutions have emerged for
handling traffic at very high speed. While the literature abounds
with software traffic generators, existing monitoring solutions do
not target worst-case scenarios (i.e., 64B packets at line rate) that
are particularly relevant for stress-testing high-speed network
functions, or occupy too many resources.
In this paper we first analyze the design space for high-speed
traffic monitoring that leads us to specific choices characterizing
FlowMon-DPDK, a DPDK-based software traffic monitor that
we make available as open source software. In a nutshell,
FlowMon-DPDK provides tunable fine-grained statistics at both
packet and flow levels. Experimental results demonstrate that our
traffic monitor is able to provide per-flow statistics with 5-nines
precision at high-speed (14.88 Mpps) using a exiguous amount of
resources. Finally, we showcase FlowMon-DPDK usage by testing
two open source prototypes for stateful flow-level end-host and
in-network packet processing.
[RIPE-76]
da Hora, Diego Neves and Christophides, Vassilis and Teixeira, Renata and Rossi, Dario,
"Perceptual evaluation of web-browsing"
Talk at the RIPE76, Measurement and Tools (MAT) Working Group
may.
2018,
Conference
@inproceedings{DR:RIPE-76,
title = {Perceptual evaluation of web-browsing},
author = {da Hora, Diego Neves and Christophides, Vassilis and Teixeira, Renata and Rossi, Dario},
booktitle = {Talk at the RIPE76, Measurement and Tools (MAT) Working Group},
address = {Marseille, France},
note = {keyword=webqoe, project=newnet},
month = may,
year = {2018}
}
@inproceedings{DR:INFOCOM-18a,
author = {Putina, Andrian and Rossi, Dario and Bifet, Albert and Barth, Steven and Pletcher, Drew and Precup, Cristina and Nivaggioli, Patrice},
title = {Unsupervised real-time detection of BGP anomalies
leveraging high-rate and fine-grained telemetry data},
booktitle = {IEEE INFOCOM, Demo Session},
month = apr,
year = {2018},
location = {Honolulu, Hawaii},
note = {keyword=measurement,ml, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18infocom-a.pdf}
}
Recent technology evolution of network equipment allow to continuously stream a wealth of information, pertaining to multiple protocols and layers of the stack, at a very fine spatial-grain and at furthermore high-frequency. Processing this deluge of telemetry data in real-time clearly offers new opportunities for network control and troubleshooting, but also poses serious challenges. In this demonstration, we tackle this challenge by applying streaming machine-learning techniques to the continuous flow of control and data-plane telemetry data, with the purpose of real-time detection of BGP anomalies. In particular, we implement an anomaly detection engine that leverages DenStream, an unsupervised clustering technique, and apply it to telemetry features collected from a large-scale testbed comprising tens of routers traversed by 1 Terabit/s worth of real application traffic.
@inproceedings{DR:INFOCOM-18b,
author = {Barach, David and Linguaglossa, Leonardo and Marion, Damjan and Pfister, Pierre and Pontarelli, Salvatore and Rossi, Dario and Tollet, Jerome},
title = {Batched packet processing for high-speed software data plane functions},
booktitle = {IEEE INFOCOM, Demo Session},
month = apr,
year = {2018},
location = {Honolulu, Hawaii},
note = {keyword=vpp, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18infocom-b.pdf}
}
In the last decade, a number of frameworks started to appear that implement, directly in user-space with kernel-bypass mode, high-speed software data plane functionalities on commodity hardware. Vector Packet Processor (VPP) is one of such frameworks, representing an interesting point in the design space in that it offers: (i) in user-space networking, (ii) the flexibility of a modular router (Click and variants) with (iii) the benefits brought by techniques such as batch processing that have become commonplace in lower-level building blocks of high-speed networking stacks (such as netmap or DPDK). Similarly to Click, VPP lets users arrange functions as a processing graph, providing a full-blown stack of network functions. However, unlike Click where the whole tree is traversed for each packet, in VPP each traversed node processes all packets in the batch before moving to the next node. This design choice enables several code optimizations that greatly improve the achievable processing throughput: the purpose of this demonstration is to introduce the main VPP concepts and architecture, as well as experimentally showing the impact of design choices –and especially of batch packet processing–, on the achievable packet forwarding performance.
[PAM-18b]
da Hora, Diego Neves and Asrese, Alemnew Sheferaw and Christophides, Vassilis and Teixeira, Renata and Rossi, Dario,
"Narrowing the gap between QoS metrics and Web QoE using Above-the-fold metrics"
International Conference on Passive and Active Network Measurement (PAM), Receipient of the Best dataset award
mar.
2018,
Conference Award
@inproceedings{DR:PAM-18b,
title = {Narrowing the gap between QoS metrics and Web QoE using Above-the-fold metrics},
author = {da Hora, Diego Neves and Asrese, Alemnew Sheferaw and Christophides, Vassilis and Teixeira, Renata and Rossi, Dario},
booktitle = {International Conference on Passive and Active Network Measurement (PAM), Receipient of the Best dataset award},
address = {Berlin, Germany},
month = mar,
year = {2018},
note = {keyword=webqoe,bestpaperaward, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18pam-b.pdf}
}
Page load time (PLT) is still the most common application Quality of Service (QoS) metric to estimate the Quality of Experience (QoE) of Web users. Yet, recent literature abounds with proposals for alternative metrics (e.g., Above The Fold, SpeedIndex and their variants) that aim at better estimating user QoE. The main purpose of this work is thus to thoroughly investigate a mapping between established and recently proposed objective metrics and user QoE. We obtain ground truth QoE via user experiments where we collect and analyze 3,400 Web accesses annotated with QoS metrics and explicit user ratings in a scale of 1 to 5, which we make available to the community. In particular, we contrast domain expert models (such as ITU-T and IQX) fed with a single QoS metric, to models trained using our ground-truth dataset over multiple QoS metrics as features. Results of our experiments show that, albeit very simple, expert models have a comparable accuracy to machine learning approaches. Furthermore, the model accuracy improves considerably when building per-page QoE models, which may raise scalability concerns as we discuss.
@inproceedings{DR:PAM-18c,
title = {{Leveraging Inter-domain Stability for BGP Dynamics Analysis}},
author = {Green, Thomas and Lambert, Anthony and Pelsser, Cristel and Rossi, Dario},
booktitle = {International Conference on Passive and Active Network Measurement (PAM)},
address = {Berlin, Germany},
year = {2018},
month = mar,
note = {keyword=measurement,bgp},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18pam-c.pdf}
}
In the Internet, Autonomous Systems continuously exchange routing information via the BGP protocol: the large number of networks involved and the verbosity of BGP result in a huge stream of updates. Making sense of all those messages remains a challenge today. In this paper, we leverage the notion of "primary path" (i.e., the most used inter-domain path of a BGP router toward a destination prefix for a given time period), reinterpreting updates by grouping them in terms of primary paths unavailability periods, and illustrate how BGP dynamics analysis would benefit from working with primary paths. Our contributions are as follows. First, through measurements, we validate the existence of primary paths: by analyzing BGP updates announced at the LINX RIS route collector spanning a three months period, we show that primary paths are consistently in use during the observation period. Second, we quantify the benefits of primary paths for BGP dynamics analysis on two use cases : Internet tomography and anomaly detection. For the latter, using three months of anomalous BGP events documented by BGPmon as reference, we show that primary paths could be used for detecting such events (hijacks and outages), testifying of the increased semantic they provide.
[PAM-18a]
Salutari, Flavia and Cicalese, Danilo and Rossi, Dario,
"A closer look at IP-ID behavior in the Wild"
International Conference on Passive and Active Network Measurement (PAM)
mar.
2018,
Conference
@inproceedings{DR:PAM-18a,
title = {A closer look at IP-ID behavior in the Wild},
author = {Salutari, Flavia and Cicalese, Danilo and Rossi, Dario},
booktitle = {International Conference on Passive and Active Network Measurement (PAM)},
address = {Berlin, Germany},
year = {2018},
month = mar,
note = {keyword=measurement, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18pam-a.pdf}
}
Originally used to assist network-layer fragmentation and reassembly, the IP identification field (IP-ID) has been used and abused for a range of tasks, from counting hosts behind NAT, to detect router aliases and, lately, to assist detection of censorship in the Internet at large. These inferences have been possible since, in the past, the IP- ID was mostly implemented as a simple packet counter: however, this behavior has been discouraged for security reasons and other policies, such as random values, have been suggested. In this study, we propose a framework to classify the different IP-ID behaviors using active probing from a single host. Despite being only minimally intrusive, our technique is significantly accurate (99% true positive classification) robust against packet losses (up to 20%) and lightweight (few packets suffices to discriminate all IP-ID behaviors). We then apply our technique to an Internet-wide census, where we actively probe one alive target per each routable /24 subnet: we find that that the majority of hosts adopts a constant IP-IDs (39%) or local counter (34%), that the fraction of global counters (18%) significantly diminished, that a non marginal number of hosts have an odd behavior (7%) and that random IP-IDs are still an exception (2%).
[HPSR-18]
Gong, YiXi and Roberts, James W. and Rossi, Dario,
"Per-Flow Fairness in the Datacenter Network"
IEEE International Conference on High Performance Switching and Routing (HPSR’18)
ju.
2018,
Conference
@inproceedings{DR:HPSR-18,
author = {Gong, YiXi and Roberts, James W. and Rossi, Dario},
title = {Per-Flow Fairness in the Datacenter Network},
booktitle = {IEEE International Conference on High Performance Switching and Routing (HPSR'18)},
month = ju,
year = {2018},
location = {Bucharest, Romania},
note = {keyword=dc, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18hpsr.pdf}
}
Datacenter network (DCN) design has been actively researched for over a decade. Solutions proposed range from end-to-end transport protocol redesign to more intricate, monolithic and cross-layer architectures. Despite this intense activity, to date we remark the absence of DCN proposals based on simple fair scheduling strategies. In this paper, we evaluate the effectiveness of FQ-CoDel in the DCN environment. Our results show, (i) that average throughput is greater than that attained with DCN tailored protocols like DCTCP, and (ii) the completion time of short flows is close to that of state-of-art DCN proposals like pFabric. Good enough performance and striking simplicity make FQ-CoDel a serious contender in the DCN arena.
@article{DR:TON-18,
title = {Caching Encrypted Content via Stochastic Cache Partitioning},
author = {Araldo, Andrea and Dan, Gyorgy and Rossi, Dario},
year = {2018},
volume = {26},
issue = {1},
doi = {10.1109/TNET.2018.2793892},
journal = {IEEE/ACM Transactions on Networking},
note = {keyword=ccmSim,ccn, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18ton.pdf}
}
In-network caching is an appealing solution to cope with the increasing bandwidth demand of video, audio and data transfer over the Internet. Nonetheless, in order to protect consumer privacy and their own business, Content Providers (CPs) increasingly deliver encrypted content, thereby preventing Internet Service Providers (ISPs) from employing traditional caching strategies, which require the knowledge of the objects being transmitted. To overcome this emerging tussle between security and effi- ciency, in this paper we propose an architecture in which the ISP partitions the cache space into slices, assigns each slice to a different CP, and lets the CPs remotely manage their slices. This architecture enables transparent caching of encrypted content, and can be deployed in the very edge of the ISP’s network (i.e., base stations, femtocells), while allowing CPs to maintain exclusive control over their content. We propose an algorithm, called SDCP, for partitioning the cache storage into slices so as to maximize the bandwidth savings provided by the cache. A distinctive feature of our algorithm is that ISPs only need to measure the aggregated miss rates of each CP, but they need not know of the individual objects that are requested. We prove that the SDCP algorithm converges to a partitioning that is close to the optimal, and we bound its optimality gap. We use simulations to evaluate SDCP’s convergence rate under stationary and non-stationary content popularity. Finally, we show that SDCP significantly outperforms traditional reactive caching techniques, considering both CPs with perfect and with imperfect knowledge of their content popularity.
@article{DR:CCR-18,
title = {A longitudinal study of IP Anycast},
author = {Cicalese, Danilo and Rossi, Dario},
journal = {ACM Computer Communication Review},
volume = {1},
year = {2018},
note = {keyword=measurement,anycast, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18ccr.pdf}
}
IP anycast is a commonly used technique to share the load of a variety of global services. For more than one year, leveraging a lightweight technique for IP anycast detection, enumeration and geolocation, we perform regular IP monthly censuses. This paper provides a brief longitudinal study of the anycast ecosystem, and we additionally make all our datasets (raw measurements from PlanetLab and RIPE Atlas), results (monthly geolocated anycast replicas for all IP/24) and code available to the community.
@inproceedings{DR:NETWORKING-18,
author = {Vamsi Addanki, Leonardo Linguaglossa, James Roberts and Rossi, Dario},
title = {Controlling software router resource sharing by fair packet dropping},
month = ma,
location = { Zurich, Switzerland },
year = {2018},
note = {keyword=vpp,dc, project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi18networking.pdf}
}
The paper discusses resource sharing in a software router where both bandwidth and CPU may be bottlenecks. We propose a novel fair dropping algorithm to realize per-flow max-min fair sharing of these resources. The algorithm is compatible with features like batch I/O and batch processing that tend to make classical scheduling impractical. We describe
an implementation using Vector Packet Processing, part of the Linux Foundation FD.io project. Preliminary experimental results prove the efficiency of the algorithm in controlling bandwidth and CPU sharing at high speed. Performance in dynamic traffic is evaluated using analysis and simulation, demonstrating that the proposed approach is both effective and scalable.
2017 # 10
[IMC-17]
Green, Thomas and Lambert, Anthony and Cristel, Pessler and Rossi, Dario.,
"Leveraging interdomain stability for squeezed and juicy BGP dynamics"
ACM Internet Measurement Conference (IMC), Poster session
nov.
2017,
Conference
@inproceedings{DR:IMC-17,
title = {Leveraging interdomain stability for squeezed and juicy BGP dynamics},
author = {Green, Thomas and Lambert, Anthony and Cristel, Pessler and Rossi, Dario.},
year = {2017},
month = nov,
booktitle = {ACM Internet Measurement Conference (IMC), Poster session},
topic = {internetmeasurement,measurement,bgp},
note = {keyword=measurement,bgp}
}
@article{DR:TMM-17,
title = {Dynamic Adaptive Video Streaming: Towards a systematic comparison of ICN and TCP/IP},
author = {Samain, Jacques and Carofiglio, Giovanna and Muscariello, Luca and Papalini, Michele and Sardara, Mauro and Tortelli, Michele and Rossi, Dario},
journal = {IEEE Transactions on Multimedia},
volume = {19},
issue = {10},
month = oct,
year = {2017},
doi = {10.1109/TMM.2017.2733340},
pages = {2166-2181},
note = {keyword=newnet,ccn},
topic = {qoe,icn,streaming},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17tmm.pdf}
}
Streaming of video contents over the Internet is experiencing an unprecedented growth. While video permeates every application, it also puts tremendous pressure in the network – to support users having heterogeneous accesses and expecting high quality of experience, in a furthermore cost-effective manner. In this context, Future Internet (FI) paradigms, such as Information Centric Networking (ICN), are particularly well suited to not only enhance video delivery at the client (as in the DASH approach), but to also naturally and seamlessly extend video support deeper in the network functions. In this paper, we contrast ICN and TCP/IP with an experimental approach, where we employ several state-of-the-art DASH controllers (PANDA, AdapTech, and BOLA) on an ICN vs TCP/IP network stack. Our campaign, based on tools which we developed and made available as open-source software, includes multiple clients (homogeneous vs heterogeneous mixture, synchronous vs asynchronous arrivals), videos (up to 4K resolution), channels (e.g., DASH profiles, emulated WiFi and LTE, real 3G/4G traces), and levels of integration with an ICN network (i.e., vanilla NDN, wireless loss detection and recovery at the access point, load balancing). Our results clearly illustrate, as well as quantitatively assess, benefits of ICN-based streaming, warning about potential pitfalls that are however easy to avoid.
@techreport{DR:ATF-17,
title = {Narrowing the gap between QoS metrics and Web QoE using Above-the-fold metrics},
author = {da Hora, Diego Neves and Asrese, Alemnew Sheferaw and Christophides, Vassilis and Teixeira, Renata and Rossi, Dario},
institution = {Telecom ParisTech},
month = oct,
year = {2017},
note = {keyword=newnet,webqoe},
topic = {qoe,measurement},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17atf.pdf}
}
@article{DR:COMNET-17b,
title = {Exploiting Parallelism in Hierarchical Content Stores for High-speed ICN Routers},
author = {Mansilha, R. and Barcellos, M. and Leonardi, E. and Rossi, D.},
journal = {Elsevier Computer Networks},
month = sep,
year = {2017},
doi = {10.1016/j.comnet.2017.04.041},
pages = {132--145},
volume = {125},
note = {keyword=newnet,ccn},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17comnet-b.pdf}
}
Information-centric network (ICN) is a novel architecture identifying data as a first class citizen, and caching as a prominent low-level feature. Yet, efficiently using large storage (e.g., 1 TB) at line rate (e.g., 10 Gbps) is not trivial: in our previous work, we proposed an ICN router design equipped with hierarchical caches, that exploits peculiarities of the ICN traffic arrival process. In this paper, we implement such proposal in the NDN Forwarding Daemon (NFD), and carry on a thorough experimental evaluation of its performance with an emulation methodology on common off the shelf hardware. Our study testifies the interest and feasibility of the approach.
@article{DR:COMNET-17a,
title = {A Hybrid Methodology for the Performance Evaluation of Internet-scale Cache Networks},
author = {Tortelli, Michele and Rossi, Dario and Leonardi, Emilio},
year = {2017},
month = sep,
journal = {Elsevier Computer Networks},
pages = {146--159},
volume = {125},
note = {keyword=newnet,ccn},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17comnet-a.pdf}
}
Two concurrent factors challenge the evaluation of large-scale cache networks: complex algorithmic interactions, which are hardly represented by analytical models, and catalog/network size, which limits the scalability of event-driven simulations. To solve these limitations, we propose a new hybrid technique, that we colloquially refer to as ModelGraft, which combines elements of stochastic analysis within a simulative Monte-Carlo approach. In ModelGraft, large scenarios are mapped to a downscaled counterpart built upon Time-To-Live (TTL) caches, to achieve CPU and memory scalability. Additionally, a feedback loop ensures convergence to a consistent state, whose performance accurately represent those of the original system. Finally, the technique also retains simulation simplicity and flexibility, as it can be seamlessly applied to numerous forwarding, meta-caching, and replacement algorithms. We implement and make ModelGraft available as an alternative simulation engine of ccnSim. Performance evaluation shows that, with respect to classic event-driven simulation, ModelGraft gains over two orders of magnitude in both CPU time and memory complexity, while limiting accuracy loss below 2%. Ultimately, ModelGraft pushes the boundaries of the performance evaluation well beyond the limits achieved in the current state of the art, enabling the study of Internet-scale scenarios with content catalogs comprising hundreds billions objects.
@inproceedings{DR:PAM-17,
title = {The Web, the Users, and the MOS: Influence of HTTP/2 on User Experience},
author = {Bocchi, Enrico and De Cicco, Luca and Mellia, Marco and Rossi, Dario},
year = {2017},
month = apr,
booktitle = {Passive and Active Measurements},
halid = {hal-01613491},
note = {keyword=newnet,webqoe},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17pam.pdf}
}
This work focuses on the evaluation of Web quality of experience as perceived by actual users and in particular on the impact of HTTP/1 vs HTTP/2. We adopt an experimental methodology that uses real web pages served through a realistic testbed where we control network, protocol, and application configuration. Users are asked to browse such pages and provide their subjective feedback, which we leverage to obtain the Mean Opinion Score (MOS), while the testbed records objective metrics. The collected dataset comprises over 4,000 grades that we explore to tackle the question whether HTTP/2 improves users experience, to what extent, and in which conditions. Findings show that users report marginal differences, with 22%, 52%, 26% of HTTP/2 MOS being better, identical, or worse than HTTP/1, respectively. Even in scenarios that favor HTTP/2, results are not as sharp as expected. This is in contrast with objective metrics, which instead record a positive impact with HTTP/2 usage. This shows the complexity of understanding the web experience and the need to involve actual users in the quality assessment process.
@article{DR:COMMAG-17,
title = {{Traffic Analysis with Off-the-Shelf Hardware: Challenges and Lessons Learned}},
author = {Trevisan, Martino and Finamore, Alessandro and Mellia, Marco and Munafo, Maurizio and Rossi, Dario},
month = mar,
year = {2017},
journal = {IEEE Communication Magazine},
volume = {55},
issue = {3},
doi = {10.1109/MCOM.2017.1600756CM},
pages = {163 - 169},
note = {keyword=newnet,highspeed},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi17commag.pdf}
}
In recent years, the progress in both hardware and software allows user-space applications to capture packets at 10 Gb/s line rate or more, with cheap COTS hardware. However, processing packets at such rates with software is still far from being trivial. In the literature, this challenge has been extensively studied for network intrusion detection systems, where per-packet operations are easy to parallelize with support of hardware acceleration. Conversely, the scalability of statistical traffic analyzers (STAs) is intrinsically complicated by the need to track per-flow state to collect statistics. This challenge has received less attention so far, and it is the focus of this work. We present and discuss design choices to enable a STA to collects hundreds of per-flow metrics at a multi-10-Gb/s line rate. We leverage a handful of hardware advancements proposed over the last years (e.g., RSS queues, NUMA architecture), and we provide insights on the trade-offs they imply when combined with state-of-the-art packet capture libraries and the multi-process paradigm. We outline the principles to design an optimized STA, and we implement them to engineer DPDKStat, a solution combining the Intel DPDK framework with the traffic analyzer Tstat. Using traces collected from real networks, we demonstrate that DPDKStat achieves 40 Gb/s of aggregated rate with a single COTS PC
2016 # 18
[ITC28a]
Araldo, Andrea and Dan, Gyorgy and Rossi, Dario,
"Stochastic Dynamic Cache Partitioning for Encrypted Content Delivery"
ITC28, Runner-up for best paper award and receipient of the IEEE ComSoc/ISOC Internet Technical Committee Best paper award 2016-2017
sep.
2016,
Conference Award
@inproceedings{DR:ITC28a,
title = {Stochastic Dynamic Cache Partitioning for Encrypted Content Delivery},
author = {Araldo, Andrea and Dan, Gyorgy and Rossi, Dario},
year = {2016},
month = sep,
booktitle = {ITC28, Runner-up for best paper award and receipient of the IEEE ComSoc/ISOC Internet Technical Committee Best paper award 2016-2017},
topic = {icn,optimization,streaming},
note = {keyword=newnet,ccn,bestpaperaward},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16itc28-a.pdf}
}
In-network caching is an appealing solution to cope with the increasing bandwidth demand of video, audio and data transfer over the Internet. Nonetheless, an increasing share of content delivery services adopt encryption through HTTPS, which is not compatible with traditional ISP-managed approaches like transparent and proxy caching. This raises the need for solutions involving both Internet Service Providers (ISP) and Content Providers (CP): by design, the solution should preserve business-critical CP information (e.g., content popularity, user preferences) on the one hand, while allowing for a deeper integration of caches in the ISP architecture (e.g., in 5G femto-cells) on the other hand. In this paper we address this issue by considering a contentoblivious ISP-operated cache. The ISP allocates the cache storage to various content providers so as to maximize the bandwidth savings provided by the cache: the main novelty lies in the fact that, to protect business-critical information, ISPs only need to measure the aggregated miss rates of the individual CPs and does not need to be aware of the objects that are requested, as in classic caching. We propose a cache allocation algorithm based on a perturbed stochastic subgradient method, and prove that the algorithm converges close to the allocation that maximizes the overall cache hit rate. We use extensive simulations to validate the algorithm and to assess its convergence rate under stationary and non-stationary content popularity. Our results (i) testify the feasibility of content-oblivious caches and (ii) show that the proposed algorithm can achieve within 10% from the global optimum in our evaluation.
@inproceedings{DR:ITC28b,
title = {ModelGraft: Accurate, Scalable, and Flexible Performance Evaluation of General Cache Networks},
author = {Tortelli, Michele and Rossi, Dario and Leonardi, Emilio},
year = {2016},
month = sep,
booktitle = {ITC28},
topic = {icn,modeling,scaling},
note = {keyword=newnet,ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16itc28-b.pdf}
}
Large scale deployments of general cache networks, such as Content Delivery Networks or Information Centric Networking architectures, arise new challenges regarding their performance evaluation for network planning. On the one hand, analytical models can hardly represent in details all the interactions of complex replacement, replication, and routing policies on arbitrary topologies. On the other hand, the sheer size of networks and content catalogs makes event-driven simulation techniques inherently non-scalable. We propose a new technique for the performance evaluation of large-scale caching systems that intelligently integrates elements of stochastic analysis within a MonteCarlo simulative approach, that we colloquially refer to as ModelGraft. Our approach (i) leverages the intuition that complex scenarios can be mapped to a simpler equivalent scenario that builds upon Time-To-Live (TTL) caches; it (ii) significantly downscales the scenario to lower computation and memory complexity, while, at the same time, preserving its properties to limit accuracy loss; finally, it (iii) is simple to use and robust, as it autonomously converges to a consistent state through a feedback-loop control system, regardless of the initial state. Performance evaluation shows that, with respect to classic event-driven simulation, ModelGraft gains over two orders of magnitude in both CPU time and memory complexity, while limiting accuracy loss below 2%. In addition, we show that ModelGraft extends performance evaluation well beyond the boundaries of classic approaches, by enabling study of Internet-scale scenarios with content catalogs comprising hundreds of billions objects.
@article{DR:TOMPECS-16,
title = {A control theoretic analysis of low-priority congestion control reprioritization under AQM},
author = {De Cicco, Luca and Gong, Yixi and Rossi, Dario and Leonardi, Emilio},
year = {2016},
month = sep,
journal = {ACM Transactions on Modeling and Performance Evaluation of Computer Systems},
volume = {1},
issue = {4},
pages = {17:1--17:33},
topic = {ledbat,ledbat+aqm},
note = {keyword=newnet,ledbat },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16tompecs.pdf}
}
Recently, a negative interplay has been shown to arise when scheduling/Active Queue Management (AQM) techniques and low-priority congestion control protocols are used together; namely, AQM resets the relative level of priority among congestion control protocols. This work explores this issue by carrying out a control-theoretic analysis of the dynamical system to prove some fundamental properties that fully characterize the reprioritization phenomenon. In particular, (i) we provide the closed-form solution of the equilibrium in the open loop (i.e., fixing a target loss probability p); (ii) we provide a stability analysis and a characterization of the reprioritization phenomenon when closing the loop with AQM (i.e., that dynamically adjusts the system loss probability). Our results are important as the characterization of the reprioritization phenomenon is not only quantitatively accurate for the specific protocols and AQM considered but also qualitatively accurate for a broader range of congestion control protocol and AQM combinations. Finally, while we find a sufficient condition to avoid the reprioritization phenomenon, we also show, at the same time, such conditions to be likely impractical: Therefore, we propose a simple and practical system-level solution that is able to reinstate priorities among protocols.
@inproceedings{DR:ICN-16a,
author = {Tortelli, M. and Rossi, D. and Leonardi, E.},
title = {Running ModelGraft to Evaluate Internet-scale ICN},
booktitle = {ACM ICN, Demo session},
address = {Kyoto, Japan},
year = {2016},
month = sep,
pages = {213--214},
topic = {icn,ccnsim,modeling,scaling},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16icn-a.pdf}
}
The analysis of Internet-scale Information-centric networks, and of cache networks in general, poses scalability issues like CPU and memory requirements, which can not be easily targeted by neither state-of-the-art analytical models nor well designed event-driven simulators. This demo focuses on showcasing performance of our new hybrid methodology, named ModelGraft, which we release as a simulation engine of the open-source ccnSim simulator: being able to seamlessly use a classic event-driven or the novel hybrid engine dramatically improves the flexibility and scalability of current simulative and analytical tools. In particular, ModelGraft combines elements and intuitions of stochastic analysis into a MonteCarlo simulative approach, offering a reduction of over two orders of magnitude in both CPU time and memory occupancy, with respect to the purely event-driven version of ccnSim, notably one of the most scalable simulators for Information-centric networks. This demo consists in gamifying the aforementioned comparison: we represent ModelGraft vs event-driven simulation as two athletes running a 100-meter competition using sprite-based animations. Differences between the two approaches in terms of CPU time, memory occupancy, and results accuracy, are highlighted in the score-board.
@inproceedings{DR:ICN-16c,
author = {Enguehard, M. and Droms, R. and Rossi, D.},
title = {SLICT: Secure Localized Information Centric Things},
booktitle = {ACM ICN, IC5G workshop},
address = {Kyoto, Japan},
year = {2016},
month = sep,
pages = {255--260},
topic = {icn,iot},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16icn-c.pdf}
}
While the potential advantages of geographic forwarding in wireless sensor networks (WSN) have been demonstrated for a while now, research in applying Information Centric Networking (ICN) has only gained momentum in the last few years. In this paper, we bridge these two worlds by proposing an ICN-compliant and secure implementation of geographic forwarding for ICN. We implement as a proof of concept the Greedy Perimeter Stateless Routing (GPSR) algorithm and compare its performance to that of vanilla ICN forwarding. We also evaluate the cost of security in 802.15.4 networks in terms of energy, memory and CPU footprint. We show that in sparse but large networks, GPSR outperforms vanilla ICN forwarding in both memory footprint and CPU consumption. However, GPSR is more energy intensive because of the cost of communications.
@inproceedings{DR:ICN-16b,
author = {Enguehard, M. and Droms, R. and Rossi, D.},
title = {On the Cost of Secure Association of Information Centric Things},
booktitle = {ACM ICN, Poster session},
address = {Kyoto, Japan},
year = {2016},
month = sep,
pages = {207--208},
topic = {icn,iot},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16icn-b.pdf}
}
Information Centric Networking (ICN) paradigms nicely fit the world of wireless sensors, whose devices have tight constraints. In this poster, we compare two alternative designs for secure association of new IoT devices in existing ICN deployments, which are based on asymmetric and symmetric cryptography respectively. While the security properties of both approaches are equivalent, an interesting trade-off arises between properties of the protocol vs properties of its implementation in current IoT boards. Indeed, while the asymmetric-keys based approach incurs a lower traffic overhead (of about 30%), we find that its implementation is significantly more energy- and time-consuming due to the cost of cryptographic operations (it requires up to 41x more energy and 8x more time).
[SIGCOMM-QoE-16]
Bocchi, Enrico and De Cicco, Luca and Rossi, Dario,
"Measuring the Quality of Experience of Web users"
ACM SIGCOMM Workshop on QoE-based Analysis and Management of Data Communication Networks (Internet-QoE 2016), selected as best paper in the workshop for reprint in ACM SIGCOMM Comput. Commun. Rev.
aug.
2016,
Conference Award
@inproceedings{DR:SIGCOMM-QoE-16,
title = {Measuring the Quality of Experience of Web users},
author = {Bocchi, Enrico and De Cicco, Luca and Rossi, Dario},
year = {2016},
month = aug,
booktitle = {ACM SIGCOMM Workshop on QoE-based Analysis and Management of Data Communication Networks (Internet-QoE 2016), selected as best paper in the workshop for reprint in ACM SIGCOMM Comput. Commun. Rev.},
topic = {internetmeasurement,qoe},
note = {keyword=newnet,webqoe,bestpaperaward project=newnet},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16internet-qoe.pdf}
}
[JSAC-16]
Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur,
"Latency-Based Anycast Geolocalization: Algorithms, Software and Datasets"
In IEEE Journal on Selected Areas of Communications, Special issue on Measuring and Troubleshooting the Internet,
Vol. 6,
pp.1889–1903,
jun.
2016,
Journal
@article{DR:JSAC-16,
title = {Latency-Based Anycast Geolocalization: Algorithms, Software and Datasets},
author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur},
year = {2016},
month = jun,
journal = {IEEE Journal on Selected Areas of Communications, Special issue on Measuring and Troubleshooting the Internet},
issue = {34},
volume = {6},
pages = {1889--1903},
topic = {internetmeasurement,ancast},
note = {keyword=mplane,anycast,gfra},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16jsac.pdf}
}
Use of IP-layer anycast has increased in the last few years beyond the DNS realm. Yet, existing measurement techniques to identify and enumerate anycast replicas exploit specifics of the DNS protocol, which limits their applicability to this particular service. With this paper, we not only propose and thoroughly validate a protocol-agnostic technique for anycast replicas discovery and geolocation, but also provide the community with open source software and datasets to replicate our experimental results, as well as facilitating the development of new techniques such as ours. In particular, our proposed method achieves thorough enumer-ation and city-level geolocalization of anycast instances from a set of known vantage points. The algorithm features an iterative workflow, pipelining enumeration (an optimization problem using latency as input) and geolocalization (a classification problem using side channel information such as city population) of anycast replicas. Results of a thorough validation campaign show our algorithm to be robust to measurement noise, and very lightweight as it requires only a handful of latency measurements.
@article{DR:COMNET-16a,
title = {Framework, Models and Controlled Experiments of Network Troubleshooting},
author = {Espinet, Francois and Joumblatt, Diana and Rossi, Dario},
year = {2016},
pages = {36-54},
month = jun,
volume = {107},
journal = {Elsevier Computer Networks},
topic = {troubleshooting},
note = {keyword=measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16comnet-a.pdf}
}
Growing network complexity mandates automated tools and methodologies for troubleshooting. In this paper, we follow a crowd-sourcing trend and argue for the need to deploy measurement probes at the edge of the network, which can be either under the control of the users (e.g., end-user devices) or the ISP (e.g., home gateways), and that raises an interesting tradeoff. Our first contribution consists in the definition of a framework for network troubleshooting, and its implementation as open source software named NetProbes. In data mining terms, depending on the amount of information available to the probes (e.g., ISP topology), we formalize the network troubleshooting task as either a clustering or a classification problem. In networking terms, these algorithms allow respectively end-users to assess the severity of the network performance degradation, and ISPs to precisely identify the faulty link. We solve both problems with an algorithm that achieves perfect classification under the assumption of a strategic selection of probes (e.g., assisted by an ISP), and assess its performance degradation under a naive random selection. Our algorithm is generic, as it is agnostic to the network performance metrics; scalable, as it requires firing only few measurement events and simple processing; flexible, as clustering and classification stages are pipelined, so that the execution naturally adapts to the information available at the vantage point where the probe is deployed; and reliable, as it produces results that match the expectations of simple analytical models. Our second contribution consists in a careful evaluation of the framework. Previous work on network troubleshooting has so far tackled the problem with either more theoretical or more practical approaches: inherently, evaluation methodologies lack either realism or control. In this paper, we counter this problem by conducting controlled experiments with a rigorous and reproducible methodology that contrasts expectations yielded by analytical models to the experimental results gathered running our NetProbes software in the Mininet emulator. As integral part of our methodology, we perform a thorough calibration of the measurement tools employed by NetProbes to measure two example metrics of interest, namely delay and bandwidth: we show this step to be crucial, as otherwise significant biases in the measurements techniques could lead to wrong assessment of algorithmic performance. Albeit our NetProbes software is far from being a carrier-grade solution for network troubleshooting (since it does not consider neither multiple contemporary measurements, nor multiple failures, and given that we experiment with a limited number of metrics), our controlled study allows making several interesting observation that help designing such an automated troubleshooting system.
@article{DR:COMNET-16b,
title = {Statistical Network Monitoring: Methodology and Application to Carrier-Grade NAT},
author = {Bocchi, Enrico and Safari Khatouni, Ali and Traverso, Stefano and Finamore, Alessandro and Munafo, Maurizio and Mellia, Marco and Rossi, Dario},
volume = {107},
month = jun,
year = {2016},
journal = {Elsevier Computer Networks},
topic = {internetmeasurement,passivemeasurement},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16comnet-b.pdf}
}
When considering to passively collect and then process network traffic traces, the need to analyze raw data at several Gbps and to extract higher level indexes from the stream of packets poses typical BigData-like challenges. In this paper, we engineer a methodology to extract, collect and process passive traffic traces. In particular, we design and implement analytics that, based on a filtering process and on the building of empirical distributions, enable the comparison between two generic collections, e.g., data gathered from two different vantage points, from different populations, or at different times. The ultimate goal is to highlight statistically significant differences that could be useful to flag to incidents for the network manager. After introducing the methodology, we apply it to assess the impact of Carrier-Grade NAT (CGN), a technology that Internet Service Providers (ISPs) deploy to limit the usage of expensive public IP addresses. Since CGN may introduce connectivity issues and performance degradation, we process a large dataset of passive measurements collected from an ISP using CGN for part of its customers. We first extract detailed per-flow information by processing packets from live links. Then, we derive higher level statistics that are significant for the end-users, e.g., TCP connection setup time, HTTP response time, or BitTorrent average download throughput. At last, we contrast figures of customers being offered public or private addresses, and look for statistically significant differences. Results show that CGN does not impair quality of service in the analyzed ISP deployment. In addition, we use the collected data to derive useful figures for the proper dimensioning of the CGN and the configuration of its parameters in order to avoid impairments on end-users experience
@inproceedings{DR:NETWORKING-16,
title = {Representation Selection Problem: Optimizing Video Delivery through Caching},
author = {Araldo, Andrea and Martignon, Fabio and Rossi, Dario},
year = {2016},
month = may,
booktitle = {IFIP Networking},
pages = {323-331},
note = {keyword=ccn },
topic = {icn,optimization,streaming},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16networking.pdf}
}
To cope with Internet video explosion, recent work proposes to deploy caches to absorb part of the traffic related to popular videos. Nonetheless, caching literature has mainly focused on network-centric metrics, while the quality of users’ video streaming experience should be the key performance index to optimize. Additionally, the general assumption is that each user request can be satisfied by a single object, which does not hold when multiple representations at different quality levels are available for the same video. Our contribution in this paper is to extend the classic object placement problem (which object to cache and where) by further considering the representation selection problem (i.e., which quality representation to cache), employing two methodologies to tackle this challenge. First, we employ a Mixed Integer Linear Programming (MILP) formulation to obtain the centralized optimal solution, as well as bounds to natural policies that are readily obtained as additional constraints of the MILP. Second, from the structure of the optimal solution, we learn guidelines that assist the design of distributed caching strategies: namely, we devise a simple yet effective distributed strategy that incrementally improves the quality of cached objects. Via simulation over large scale scenarios comprising up to hundred nodes and hundred million objects, we show our proposal to be effective in balancing user perceived utility vs bandwidth usage.
[SIMPAT-16]
Tortelli, Michele and Rossi, Dario and Boggia, Gennaro and Grieco, Luigi Alfredo,
"ICN software tools: survey and cross-comparison"
In Elsevier Simulation Modelling Practice and Theory,
Vol. 63,
pp.23–46,
apr.
2016,
Journal
@article{DR:SIMPAT-16,
title = {ICN software tools: survey and cross-comparison},
author = {Tortelli, Michele and Rossi, Dario and Boggia, Gennaro and Grieco, Luigi Alfredo},
year = {2016},
journal = {Elsevier Simulation Modelling Practice and Theory},
volume = {63},
month = apr,
pages = {23--46},
note = {keyword=mplane },
topic = {icn,ccnsim},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16simpat.pdf}
}
Research interest on Information Centric Networking (ICN) has been sharply growing. Although new architectures, algorithms, and analytical models have been proposed, their evaluation remains often isolated and not rigorously verified by the research community. This paper initially portrays the composition of open source software tools available for ICN, certifying the predominance of Content Centric Networking (CCN)/Named Data Networking (NDN) simulators. Then, inspired by similar works related to the P2P field, it surveys related research papers to qualify the ICN literature produced so far, finding that a large fraction of contributions either uses custom, proprietary, and unavailable software, or even plainly fails to mention any information in this regard. By adopting a rigorous methodology, in the second part of the paper four simulators, namely ndnSIM, ccnSim, CCNPL-Sim, and Icarus, are cross-compared under several key aspects. Our findings confirm both their accuracy with respect to reference theoretical models in simple settings, and their consistency in more complex scenario. Additionally, our analysis can both assist researchers in the choice of the tool that best fits their needs, and provide guidelines to avoid common pitfalls in the ICN performance evaluation.
[TMA-16]
Giordano, Danilo and Cicalese, Danilo and Finamore, Alessandro and Mellia, Marco and Munafo, Maurizio and Joumblatt, Diana and Rossi, Dario,
"A First Characterization of Anycast Traffic from Passive Traces"
IFIP workshop on Traffic Monitoring and Analysis (TMA),
apr.
2016,
Conference
@inproceedings{DR:TMA-16,
title = {A First Characterization of Anycast Traffic from Passive Traces},
author = {Giordano, Danilo and Cicalese, Danilo and Finamore, Alessandro and Mellia, Marco and Munafo, Maurizio and Joumblatt, Diana and Rossi, Dario},
year = {2016},
month = apr,
booktitle = {IFIP workshop on Traffic Monitoring and Analysis (TMA),},
pages = {30-38},
note = {keyword=mplane,anycast,gfra },
topic = {anycast,internetmeasurement,passivemeasurement},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16tma.pdf}
}
IP anycast routes packets to the topologically nearest server according to BGP proximity. In the last years, new players have started adopting this technology to serve web content via Anycast-enabled CDNs (A-CDN). To the best of our knowledge, in the literature, there are studies that focus on a specific A-CDN deployment, but little is known about the users and the services that A-CDNs are serving in the Internet at large. This prompted us to perform a passive characterization study, bringing out the principal A-CDN actors in our monitored setup, the services they offer, their penetration, etc. Results show a very heterogeneous picture, with A-CDN empowered services that are very popular (e.g., Twitter or Bing), serve a lot of different contents (e.g., Wordpress or adult content), and even include audio/video streaming (e.g., Soundcloud, or Vine). Our measurements show that the A-CDN technology is quite mature and popular, with more than 50% of web users that access content served by a A-CDN during peak time.
[INFOCOM-IC-16]
Bocchi, Enrico and De Cicco, Luca and Rossi, Dario,
"Web QoE: Moving beyond Google’s SpeedIndex"
Finalist at the IEEE INFOCOM Innovation Challenge,
apr.
2016,
Conference Runner-up
@inproceedings{DR:INFOCOM-IC-16,
title = {Web QoE: Moving beyond Google's SpeedIndex},
author = {Bocchi, Enrico and De Cicco, Luca and Rossi, Dario},
year = {2016},
month = apr,
booktitle = {Finalist at the IEEE INFOCOM Innovation Challenge,},
note = {keyword=webqoe,bestpaperrunnerup project=webqoe},
topic = {qoe,internetmeasurement},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi16infocom-innovation-challenge.pdf}
}
@inproceedings{DR:DIRECTORSCUT-16,
title = {HTTP/2 vs the Users: The Good, The Bad and The Ugly (Director's Cut)},
author = {Bocchi, Enrico and De Cicco, Luca and Mellia, Marco and Rossi, Dario},
year = {2016},
month = apr,
booktitle = {Technical Report},
note = {keyword=webqoe project=webqoe},
topic = {qoe,internetmeasurement},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/director.pdf}
}
@inproceedings{DR:TECHREP-16,
title = {Latency-Based Anycast Geolocalization: Algorithms, Software and Datasets (Extended Technical Report)},
author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur},
year = {2016},
booktitle = {Tech. Rep.},
topic = {anycast,internetmeasurement},
note = {keyword=mplane,anycast,gfra },
howpublished = {http://www.enst.fr/~drossi/dataset/anycast/anycast-techrep.pdf}
}
2015 # 12
[CoNEXT-15]
Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Friedman, Timur and Rossi, Dario,
"Characterizing IPv4 Anycast Adoption and Deployment"
ACM CoNEXT, awarded the IRTF Applied Network Research Prize at IETF96
dec.
2015,
Conference Award
@inproceedings{DR:CoNEXT-15,
title = {Characterizing IPv4 Anycast Adoption and Deployment},
author = {Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Friedman, Timur and Rossi, Dario},
booktitle = {ACM CoNEXT, awarded the IRTF Applied Network Research Prize at IETF96},
address = {Heidelberg},
month = dec,
year = {2015},
topic = {anycast,internetmeasurement},
note = {keyword=mplane,anycast,bestpaperaward category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15conext.pdf}
}
This paper provides a comprehensive picture of IP-layer anycast adoption in the current Internet. We carry on multiple IPv4 anycast censuses, relying on latency measurement from PlanetLab. Next, we leverage our novel technique for anycast detection, enumeration, and geolocation to quantify anycast adoption in the Internet. Our technique is scalable and, unlike previous efforts that are bound to exploiting DNS, is protocol-agnostic. Our results show that major Internet companies (including tier-1 ISPs, over-the-top operators, Cloud providers and equipment vendors) use anycast: we find that a broad range of TCP services are offered over anycast, the most popular of which include HTTP and HTTPS by anycast CDNs that serve websites from the top-100k Alexa list. Additionally, we complement our characterization of IPv4 anycast with a description of the challenges we faced to collect and analyze large-scale delay measurements, and the lessons learned.
@inproceedings{DR:P2P-15,
title = {Design and Analysis of an Improved BitMessage Anti-spam Mechanism},
author = {Schaub, Alexander and Rossi, Dario},
year = {2015},
booktitle = {IEEE P2P'XIV},
month = sep,
topic = {p2p},
note = {keyword=inf570 category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15p2p.pdf}
}
The BitMessage protocol offers privacy to its anonymous users. It is a completely decentralized messaging system, enabling users to exchange messages preventing accidental eavesdropping – a nice features in the Post-Snowden Internet Era. Not only messages are sent to every node on the network (making it impossible to understand the intended recipient), but their content is encrypted with the intended recipient public key (so that s/he only can decipher it). As these two properties combined might facilitate spamming, a proof-of-work (PoW) mechanism has been designed to mitigate this threat: only messages exhibiting properties of the PoW are forwarded on the network: since PoW is based on computationally heavy cryptographic functions, this slows down the rate at which spammers can introduce unsolicited messages in the network on the one hand, but also makes it harder to send legitimate messages for regular users on the other hand. In this paper, we (i) carry on an analysis of the current PoW mechanism, (ii) propose a very simple, yet very effective, generalization of the formula that decouples spammers vs legitimate users penalty showing that (iii) at the optimum, our proposal halves the harm spammers can do, avoiding by definition any impact for legitimate users.
@inproceedings{DR:ICN-15,
title = {Hierarchical Content Stores in High-speed ICN Routers: Emulation and Prototype Implementation},
author = {{R. Mansilha, L. Saino, M. Barcellos, M. Gallo, E. Leonardi, D. Perino and D. Rossi}},
booktitle = {ACM SIGCOMM Conference on Information-Centric Networking (ICN'15)},
address = {San Francisco, CA},
month = sep,
year = {2015},
pages = {147-156},
topic = {system,icn},
note = {keyword=ccn category=article state=published project=diego},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15icn.pdf}
}
Recent work motivates the design of Information-centric rou-ters that make use of hierarchies of memory to jointly scale in the size and speed of content stores. The present paper advances this understanding by (i) instantiating a general purpose two-layer packet-level caching system, (ii) investigating the solution design space via emulation, and (iii) introducing a proof-of-concept prototype. The emulation-based study reveals insights about the broad design space, the expected impact of workload, and gains due to multi-threaded execution. The full-blown system prototype experimentally confirms that, by exploiting both DRAM and SSD memory technologies, ICN routers can sustain cache operations in excess of 10Gbps running on off-the-shelf hardware.
[TRAC-15]
Bocchi, Enrico and Safari, Ali and Traverso, Stefano and Finamore, Alessandro and Di Gennaro, Valeria and Mellia, Marco and Munafo, Maurizio and Rossi, Dario,
"Impact of Carrier-Grade NAT on Web Browsing"
6th International Workshop on TRaffic Analysis and Characterization (TRAC), Best paper award
aug.
2015,
Conference Award
@inproceedings{DR:TRAC-15,
title = {Impact of Carrier-Grade NAT on Web Browsing},
author = {Bocchi, Enrico and Safari, Ali and Traverso, Stefano and Finamore, Alessandro and Di Gennaro, Valeria and Mellia, Marco and Munafo, Maurizio and Rossi, Dario},
year = {2015},
booktitle = {6th International Workshop on TRaffic Analysis and Characterization (TRAC), Best paper award},
month = aug,
topic = {internetmeasurement,passivemeasurement},
note = {keyword=mplane,bestpaperaward category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15trac.pdf}
}
Public IPv4 addresses are a scarce resource. While IPv6 adoption is lagging, Network Address Translation (NAT) technologies have been deployed over the last years to alleviate IPv4 exiguity and their high rental cost. In particular, Carrier- Grade NAT (CGN) is a well known solution to mask a whole ISP network behind a limited amount of public IP addresses, significantly reducing expenses. Despite its economical benefits, CGN can introduce connectivity issues which have sprouted a considerable effort in research, development and standardization. However, to the best of our knowledge, little effort has been dedicated to investigate the impact that CGN deployment may have on users traffic. This paper fills the gap. We leverage passive measurements from an ISP network deploying CGN and, by means of the Jensen- Shannon divergence, we contrast several performance metrics considering customers being offered public or private addresses. In particular, we gauge the impact of CGN presence on users web browsing experience. Our results testify that CGN is a mature and stable technology as, if properly deployed, it does not harm users web browsing experience. Indeed, while our analysis lets emerge expected stochastic differences of certain indexes (e.g., the difference in the path hop count), the measurements related to the quality of users browsing are otherwise unperturbed. Interestingly, we also observe that CGN
@inproceedings{DR:ICC-15,
title = {Snooping Wikipedia Vandals with MapReduce},
author = {Spina, Michele and Rossi, Dario and Sozio, Mauro and Maniu, Silviu and Cautis, Bogdan},
year = {2015},
booktitle = {IEEE ICC},
month = jun,
address = {London, UK},
topic = {wikipedia,bigdata},
note = {keyword=measurement category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15icc.pdf}
}
In this paper, we present and validate an algorithm able to accurately identify anomalous behaviors on online and collaborative social networks, based on their interaction with other fellows. We focus on Wikipedia, where accurate ground truth for the classification of vandals can be reliably gathered by manual inspection of the page edit history. We develop a distributed crawler and classifier tasks, both implemented in MapReduce, with whom we are able to explore a very large dataset, consisting of over 5 millions articles collaboratively edited by 14 millions authors, resulting in over 8 billion pairwise interactions. We represent Wikipedia as a signed network, where positive arcs imply constructive interaction between editors. We then isolate a set of high reputation editors (i.e., nodes having many positive incoming links) and classify the remaining ones based on their interactions with high reputation editors. We demonstrate our approach not only to be practically relevant (due to the size of our dataset), but also feasible (as it requires few MapReduce iteration) and accurate (over 95% true positive rate). At the same time, we are able to classify only about half of the dataset editors (recall of 50%) for which we outline some solution under study.
[INFOCOM-15b]
Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur,
"A Lightweight Anycast Enumeration and Geolocation"
IEEE INFOCOM, Demo Session
apr.
2015,
Conference
@inproceedings{DR:INFOCOM-15b,
title = {A Lightweight Anycast Enumeration and Geolocation},
author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Friedman, Timur},
year = {2015},
booktitle = {IEEE INFOCOM, Demo Session},
month = apr,
address = {Hong Kong, China},
topic = {anycast,internetmeasurement},
note = {keyword=anycast,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15infocom-b.pdf}
}
Several Internet services such as CDNs, DNS name servers, and sinkholes use IP-layer anycast to reduce user response times and increase robustness with respect to network failures and denial of service attacks. However, current geolocation tools fail with anycast IP addresses. In our recent work, we remedy to this by developing an anycast detection, enumeration, and geolocation technique based on a set of delay measurements from a handful of geographically distributed vantage points. The technique (i) detects if an IP is anycast, (ii) enumerates replicas by finding the maximum set of non-overlapping disks (i.e., areas centered around vantage points), and (iii) geolocates the replicas by solving a classification problem and assigning the server location to the most likely city. We propose to demo this technique. In particular, we visually show how to detect an anycast IP, enumerate its replicas, and geolocate them on a map. The demo allows to browse previously geolocated services, as well as to explore new targets on demand.
@inproceedings{DR:INFOCOM-15a,
title = {A Fistful of Pings: Accurate and Lightweight Anycast Enumeration and Geolocation},
author = {Cicalese, Danilo and Joumblatt, Diana and Rossi, Dario and Buob, Marc-Olivier and Auge, Jordan and Freeman, Timur},
year = {2015},
booktitle = {IEEE INFOCOM},
month = apr,
address = {Hong Kong, China},
topic = {anycast,internetmeasurement},
note = {keyword=anycast,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15infocom.pdf}
}
Use of IP-layer anycast has increased in the last few years: once relegated to DNS root and top-level domain servers, anycast is now commonly used to assist distribution of general purpose content by CDN providers. Yet, the measurement techniques for discovering anycast replicas have been designed around DNS, limiting their usefulness to this particular service. This raises the need for protocol agnostic methodologies, that should additionally be as lightweight as possible in order to scale up anycast service discovery. This is precisely the aim of this paper, which proposes a new method for exhaustive and accurate enumeration and city-level geolocation of anycast instances, requiring only a handful of latency measurements from a set of known vantage points. Our method exploits an iterative workflow that enumerates (an optimization problem) and geolocates (a classification problem) anycast replicas. We thoroughly validate our methodology on available ground truth (several DNS root servers), using multiple measurement infrastructures (PlanetLab, RIPE), obtaining extremely accurate results (even with simple algorithms, that we compare with the global optimum), that we make available to the scientific community. Compared to the state of the art work that appeared in INFOCOM 2013 and IMC 2013, our technique (i) is not bound to a specific protocol, (ii) requires 1000 times fewer vantage points, not only (iii) achieves over 50% recall but also (iv) accurately identifies the city-level geolocation for over 78% of the enumerated servers, with (v) a mean geolocation error of 361 km for all enumerated servers.
@inproceedings{DR:TMA-15,
title = {Zen and the art of network troubleshooting: a hands on experimental study},
author = {Espinet, Francois and Joumblatt, Diana and Rossi, Dario},
year = {2015},
booktitle = {Traffic Monitoring and Analysis (TMA'15)},
month = apr,
address = {Barcellona, Spain},
topic = {troubleshooting},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15tma.pdf}
}
Growing network complexity necessitates tools and methodologies to automate network troubleshooting. In this paper, we follow a crowd-sourcing trend, and argue for the need to deploy measurement probes at end-user devices and gateways, which can be under the control of the users or the ISP. Depending on the amount of information available to the probes (e.g., ISP topology), we formalize the network troubleshooting task as either a clustering or a classification problem, that we solve with an algorithm that (i) achieves perfect classification under the assumption of a strategic selection of probes (e.g., assisted by an ISP) and (ii) operates blindly with respect to the network performance metrics, of which we consider delay and bandwidth in this paper. While previous work on network troubleshooting privileges a more theoretical vs practical approaches, our workflow balances both aspects as (i) we conduct a set of controlled experiments with a rigorous and reproducible methodology, (ii) on an emulator that we thoroughly calibrate, (iii) contrasting experimental results affected by real-world noise with expected results from a probabilistic model.
[DRCN-15]
Mazloum, Riad and Auge, Jordan and Rossi, Dario and Friedman, Timur,
"Errors Announcing 32-bit ASNs in BGP Routes"
11th International Conference on Design of Reliable Communication Networks (DRCN’15), Extended Abstract
mar.
2015,
Conference
@inproceedings{DR:DRCN-15,
title = {Errors Announcing 32-bit ASNs in BGP Routes},
author = {Mazloum, Riad and Auge, Jordan and Rossi, Dario and Friedman, Timur},
year = {2015},
booktitle = {11th International Conference on Design of Reliable Communication Networks (DRCN'15), Extended Abstract},
month = mar,
address = {Kansas City, USA},
topic = {bgp,internetmeasurement},
note = {keyword=measurement category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15drcn.pdf}
}
We present evidence of a phenomenon where there is large scale misconfigurations of BGP routers starting in 2007 and continuing to the present date. This has to do with the change from 16-bit to 32-bit autonomous system (AS) numbering and the employment of the fictitious AS, AS23456. This error strongly biases estimations of the number of ASes performing a type of inter-domain routing called multi-exit routing. In data from 2010, over half of apparent cases are in fact false. We show how to detect this error and obtain a truer picture of the extent of multi-exit routing.
[AIMS-15]
Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Rossi, Dario and Friedman, Timur,
"Anycast census and geolocation"
7th Workshop on Active Internet Measurements (AIMS 2015)
mar.
2015,
Conference
@inproceedings{DR:AIMS-15,
title = {Anycast census and geolocation},
author = {Cicalese, Danilo and Auge, Jordan and Joumblatt, Diana and Rossi, Dario and Friedman, Timur},
year = {2015},
booktitle = {7th Workshop on Active Internet Measurements (AIMS 2015)},
month = mar,
address = {San Diego, CA},
topic = {anycast,internetmeasurement},
note = {keyword=anycast,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15aims.pdf}
}
@article{DR:TPDPS-15,
author = {Araldo, A. and Rossi, D. and Martignon, F.},
journal = {IEEE Transactions on Parallel and Distributed Systems},
title = {Cost-aware caching: Caching more (costly items) for less (ISPs operational expenditures)},
year = {2015},
month = {},
keywords = {Approximation methods;Catalogs;Computational modeling;Economics;Internet;Load modeling;Proposals},
doi = {10.1109/TPDS.2015.2433296},
issn = {1045-9219},
note = { },
topic = {icn,optimization},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi15tpdps.pdf}
}
Albeit an important goal of caching is traffic reduction, a perhaps even more important aspect follows from the above achievement: the reduction of Internet Service Provider (ISP) operational costs that comes as a consequence of the reduced load on transit and provider links. Surprisingly, to date this crucial aspect has not been properly taken into account in cache design. In this paper, we show that the classic caching efficiency indicator, i.e. the hit ratio, conflicts with cost. We therefore propose a mechanism whose goal is the reduction of cost and, in particular, we design a Cost-Aware (CoA) cache decision policy that, leveraging price heterogeneity among external links, tends to store with more probability the objects that the ISP has to retrieve through the most expensive links. We provide a model of our mechanism, based on Che’s approximation, and, by means of a thorough simulation campaign, we contrast it with traditional cost-blind schemes, showing that CoA yields a significant cost saving, that is furthermore consistent over a wide range of scenarios. We show that CoA is easy to implement and robust, making the proposal of practical relevance.
Caching is frequently used by Internet Service Providers as a viable technique to reduce the latency perceived by end users, while jointly offloading network traffic. While the cache hit-ratio is generally considered in the literature as the dominant performance metric for such type of systems, in this paper we argue that a critical missing piece has so far been neglected. Adopting a radically different perspective, in this paper we explicitly account for the cost of content retrieval, i.e. the cost associated to the external bandwidth needed by an ISP to retrieve the contents requested by its customers. Interestingly, we discover that classical cache provisioning techniques that maximize cache efficiency (i.e., the hit-ratio), lead to suboptimal solutions with higher overall cost. To show this mismatch, we propose two optimization models that either minimize the overall costs or maximize the hit-ratio, jointly providing cache sizing, object placement and path selection. We formulate a polynomial- time greedy algorithm to solve the two problems and analytically prove its optimality. We provide numerical results and show that significant cost savings are attainable via a cost-aware design.
@inproceedings{DR:ICN-14a,
title = {Design and Evaluation of Cost-aware Information Centric Routers},
author = {Araldo, Andrea and Rossi, Dario and Martignon, Fabio},
booktitle = {1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014)},
address = {Paris, France},
month = sep,
year = {2014},
pages = {147-156},
note = {keyword=ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14icn-a.pdf}
}
Albeit an important goal of Information Centric Networking (ICNs) is traffic reduction, a perhaps even more important aspect follows from the above achievement: the reduction of ISP operational costs that comes as consequence of the reduced load on transit and provider links. Surprisingly, to date this crucial aspect has not been properly taken into account, neither in the architectural design, nor in the op- eration and management of ICN proposals. In this work, we instead design a distributed cost-aware scheme that explicitly considers the cost heterogeneity among different links. We contrast our scheme with both traditional cost-blind schemes and optimal results. We further propose an architectural design to let multiple schemes be interoper- able, and finally assess whether overlooking implementation details could hamper the practical relevance of our design. Numerical results show that our cost-aware scheme can yield significant cost savings, that are furthermore consistent over a wide range of scenarios.
@inproceedings{DR:ICN-14b,
title = {Coupling caching and forwarding: Benefits, analysis, and implementation},
author = {Rossini, Giuseppe and Rossi, Dario},
booktitle = {1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014)},
address = {Paris, France},
month = sep,
year = {2014},
pages = {127-136},
note = {keyword=ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14icn-b.pdf}
}
A recent debate revolves around the usefulness of pervasive caching, i.e., adding caching capabilities to possibly every router of the future Internet. Recent research argues against it, on the ground that it provides only very limited gain with respect to the current CDN scenario, where caching only happens at the network edge. In this paper, we instead show that advantages of ubiquitous caching appear only when meta-caching (i.e., whether or not cache the incoming object) and forwarding (i.e., where to direct requests in case of cache miss) decisions are tightly coupled. Summarizing our contributions, we (i) show that gains can be obtained provided that ideal Nearest Replica Routing (iNRR) forwarding and Leave a Copy Down (LCD) meta-caching are jointly in use, (ii) model the iNRR forwarding policy, (iii) provide two alternative implementations that arbitrarily closely approximate iNRR behavior, and (iv) promote cross-comparison by making our code available to the community.
[ICN-14e]
Tortelli, Michele and Rossi, Dario and Boggia, Gennaro and Grieco, Luigi Alfredo,
"CCN Simulators: Analysis and Cross-Comparison"
1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014), Demo Session
sep.
2014,
Conference
@inproceedings{DR:ICN-14e,
title = {CCN Simulators: Analysis and Cross-Comparison},
author = {Tortelli, Michele and Rossi, Dario and Boggia, Gennaro and Grieco, Luigi Alfredo},
booktitle = {1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014), Demo Session},
address = {Paris, France},
month = sep,
year = {2014},
pages = {197-198},
note = {keyword=ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14icn-e.pdf}
}
@inproceedings{DR:ICN-14d,
title = {Analyzing Cacheability in the Access Network with HACkSAw},
author = {Imbrenda, Claudio and Muscariello, Luca and Rossi, Dario},
booktitle = {1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014), Demo Session},
address = {Paris, France},
month = sep,
year = {2014},
pages = {201-202},
note = {keyword=ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14icn-d.pdf}
}
Web traffic is growing, and the need for accurate traces of HTTP traffic is therefore also rising, both for operators and researchers, as accurate HTTP traffic traces allow to analyse and characterize the traffic and the clients, and to analyse the performance of the network and the perceived quality of service for the final users. Since most ICN proposals also advocate for pervasive caching, it is imperative to measure the cacheability of traffic to assess the impact and/or the potential benefits of such solutions. This demonstration will show a both a tool to collect HTTP traces that is both fast and accurate and that overcomes the limitations of existing tools, and a set of important statistics that can be computed in post processing, like aggregate/demultiplexed cacheability figures.
@inproceedings{DR:ICN-14c,
title = {Analyzing Cacheable Traffic in ISP Access Networks for Micro CDN applications via Content-Centric Networking},
author = {Imbrenda, Claudio and Muscariello, Luca and Rossi, Dario},
booktitle = {1st ACM SIGCOMM Conference on Information-Centric Networking (ICN-2014)},
address = {Paris, France},
month = sep,
year = {2014},
pages = {57-66},
note = {keyword=ccn,measurement,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14icn-c.pdf}
}
Web content coming from outside the ISP is today skyrocketing, causing significant additional infrastructure costs to network operators. The reduced marginal revenues left to ISPs, whose business is almost entirely based on declining flat rate subscriptions, call for significant innovation within the network infrastructure, to support new service delivery. In this paper, we suggest the use of micro CDNs in ISP access and back-haul networks to reduce redundant web traffic within the ISP infrastructure while improving user’s QoS. With micro CDN we refer to a content delivery system composed of (i) a high speed caching substrate, (ii) a content based routing protocol and (iii) a set of data transfer mechanisms made available by content-centric networking. The contribution of this paper is twofold. First, we extensively analyze more than one month of web traffic via continuous monitoring between the access and back-haul network of Orange in France. Second, we characterize key properties of monitored traffic, such as content popularity and request cacheability, to infer potential traffic reduction enabled by the introduction of micro CDNs. Based on these findings, we then perform micro CDN dimensioning in terms of memory requirements and provide guidelines on design choices
@inproceedings{DR:QICN-14,
title = {Pedestrian Crossing: The Long and Winding Road toward Fair Cross-comparison of ICN Quality},
author = {Tortelli, Michele and Rossi, Dario and Boggia, Gennaro and Grieco, Luigi Alfredo},
booktitle = {International Workshop on Quality, Reliability, and Security in Information-Centric Networking (Q-ICN)},
address = {Rhodes, Greece},
month = aug,
year = {2014},
note = {keyword=ccn },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14qicn.pdf}
}
While numerous Information Centric Networking (ICN) architectures have been proposed over the last years, the community has so far only timidly attempted at a quantitative assessment of the relative quality of service level that users are expected to enjoy in each of them. This paper starts a journey toward the cross comparison of ICN alternatives, making several contributions along this road. Specifically, a census of 20 ICN software tools reveals that about 10 are dedicated to a specific architecture, about half of which are simulators. Second, we survey ICN research papers using simulation to gather information concerning the used simulator, finding that a large fraction either uses custom proprietary and unavailable software, or even plainly fails to mention any information on this regard, which is deceiving. Third, we cross-compare some of the available simulators, finding that they achieve consistent results, which is instead encouraging. Fourth, we propose a methodology to increase and promote cross- comparison, which is within reach but requires community-wide agreement, promotion and enforcement.
@inproceedings{DR:TRAC-14,
title = {A per-Application Account of Bufferbloat: Causes and Impact on Users},
author = {Araldo, Andrea and Rossi, Dario},
booktitle = {5th International Workshop on TRaffic Analysis and Characterization (TRAC), Best paper award},
year = {2014},
address = {Nicosia, Cyprus},
month = aug,
note = {keyword=measurement,bufferbloat,bestpaperaward category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14trac.pdf}
}
We propose a methodology to gauge the extent of queueing delay (aka bufferbloat) in the Internet, based on purely passive measurement of TCP traffic. We implement our methodology in Tstat and make it available as open source software. We leverage Deep Packet Inspection (DPI) and behavioral classification of Tstat to breakdown the queueing delay across different applications, in order to evaluate the impact of bufferbloat on user experience. We show that there is no correlation between the ISP traffic load and the queueing delay, thus confirming that bufferbloat is related only to the traffic of each single user (or household). Finally, we use frequent itemset mining techniques to associate the amount of queueing delay seen by each host with the set of its active applications, with the goal of investigating the root cause of bufferbloat.
[COMMAG-14]
Trammell, Brian and Casas, Pedro and Rossi, Dario and Bar, Arian and Ben-Houidi, Zied and Leontiadis, Ilias and Szemethy, Tivadar and Mellia, Marco,
"mPlane: an Intelligent Measurement Plane for the Internet"
In IEEE Communications Magazine, Special Issue on Monitoring and Troubleshooting Multi-domain Networks using Measurement Federations,
pp.148-156,
may.
2014,
Journal
@article{DR:COMMAG-14,
author = {Trammell, Brian and Casas, Pedro and Rossi, Dario and Bar, Arian and Ben-Houidi, Zied and Leontiadis, Ilias and Szemethy, Tivadar and Mellia, Marco},
title = {mPlane: an Intelligent Measurement Plane for the Internet},
journal = {IEEE Communications Magazine, Special Issue on Monitoring and Troubleshooting Multi-domain Networks using Measurement Federations},
vol = {42},
pages = {148-156},
issue = {5},
month = may,
year = {2014},
note = {keyword=measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14commag.pdf}
}
The Internet universality is based on its decentralization and diversity. However, its distributed nature leads to operational brittleness and difficulty in identifying the root causes of performance and availability issues, especially when the involved systems span multiple administrative domains. The first step to address this fragmentation is coordinated measurement: we propose to complement the current Internet data and control planes with a measurement plane, or mPlane for short. mPlane distributed measurement infrastructure collects and analyzes traffic measurements at a wide variety of scales to monitor the network status. Its architecture is centered on a flexible control interface, allowing the incorporation of existing measurement tools through lightweight mPlane proxy components, and offering dynamic support for new capabilities. A focus on automated, iterative measurement makes the platform well-suited to troubleshooting support. This is supported by a reasoning system, which applies machine learning algorithms to learn from success and failure in drilling down to the root cause of a problem. This article describes the mPlane architecture and shows its applicability to several distributed measurement problems involving content delivery networks and Internet service roviders. A first case study presents the tracking and iterative analysis of cache selection policies in Akamai, while a second example focuses on the cooperation between Internet service providers and content delivery networks to better orchestrate their traffic engineering decisions and jointly improve their performance.
@inproceedings{DR:INFOCOM-14,
title = {Multi-Terabyte and Multi-Gbps Information Centric Routers},
author = {Rossini, G. and Rossi, D. and Garetto, G. and Leonardi, E.},
year = {2014},
booktitle = {IEEE INFOCOM},
month = apr,
address = {Toronto, Canada},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14infocom.pdf}
}
One of the main research directions along which the future Internet is evolving can be identified in the paradigmatic shift from a network of hosts toward a network of caches. Yet, several questions remain concerning the scalability of individual algorithms (e.g., name based lookup and routing) and components (e.g., caches) of these novel Information Centric Networking (ICN) architectures. Exploiting a peculiar characteristics of ICN (i.e., the fact that contents are split in chunks), and the nature of video streaming (which dominates Internet traffic), this paper proposes a novel two-layers caching scheme that allows multi-Terabyte caches to sustain content streaming at multi-Gbps speed. We model the system as an extension, to the case of chunked contents, of the well known Che approximation, that has the advantage of being very simple and accurate at the same time. Simulations under synthetic and realistic trace-driven traffic confirm the accuracy of the analysis and the feasibility of the proposed architecture.
@inproceedings{DR:TMA-14a,
title = {Aggregation of statistical data from passive probes: Techniques and best practices},
author = {Colabrese, S. and Rossi, D. and Mellia, M.},
year = {2014},
address = {London, UK},
pages = {38-50},
booktitle = {Traffic Measurement and Analysis (TMA)},
month = apr,
note = {keyword=measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14tma-a.pdf}
}
Passive probes continuously generate statistics on large number of metrics, that are possibly represented as probability mass functions (pmf). The need for consolidation of several pmfs arises in two contexts, namely: (i) whenever a central point collects and aggregates measurement of multiple disjoint vantage points, and (ii) whenever a local measurement processed at a single vantage point needs to be distributed over multiple cores of the same physical probe, in order to cope with growing link capacity. In this work, we take an experimental approach and study both cases using, whenever possible, open source software and datasets. Considering different consolidation strategies, we assess their accuracy in estimating pmf deciles (from the 10th to the 90th) of diverse metrics, obtaining general design and tuning guidelines. In our dataset, we find that Monotonic Spline Interpolation over a larger set of percentiles (e.g., adding 5th, 10th, 15th, and so on) allow fairly accurate pmf consolidation in both the multiple vantage points (median error is about 1%, maximum 30%) and local processes (median 0.1%, maximum 1%) cases.
@inproceedings{DR:TMA-14c,
title = {Peeking Through the BitTorrent Seedbox Hosting Ecosystem},
author = {Rossi, Dario and Pujol, Guilhem and Wang, Xiao and Mathieu, Fabien},
year = {2014},
booktitle = {Traffic Measurement and Analysis (TMA)},
month = apr,
note = {keyword=measurement,inf570 category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14tma-c.pdf}
}
In this paper, we propose a lightweight method for detecting and classifying BitTorrent content providers with a minimal amount of resources. While heavy methodologies are typically used (which require long term observation and data exchange with peers of the swarm and/or a semantic analysis of torrent websites), we instead argue that such complexity can be avoided by analyzing the correlations between peers and torrents. We apply our methodology to study over 50K torrents injected in ThePirateBay during one month, collecting more than 400K IPs addresses. Shortly, we find that exploiting the correlations not only enhances the classification accuracy keeping the technique lightweight (our methodology reliably identifies about 150 seedboxes), but also uncovers seeding behaviors that were not previously noticed (e.g., as multi-port and multi-host seeding). Finally, we correlate the popularity of seedbox hosting in our dataset to criteria (e.g., cost, storage space, Web popularity) that can bias the selection process of BitTorrent content providers.
[TMA-14b]
Nassopulos, Georges and Rossi, Dario and Gringoli, Francesco and Nava, Lorenzo and Dusi, Maurizio and del Rio, Pedro Maria Santiago,
"Flow management at multi-Gbps: tradeoffs and lessons learned"
Traffic Measurement and Analysis (TMA)
apr.
2014,
Conference
@inproceedings{DR:TMA-14b,
title = {Flow management at multi-Gbps: tradeoffs and lessons learned},
author = {Nassopulos, Georges and Rossi, Dario and Gringoli, Francesco and Nava, Lorenzo and Dusi, Maurizio and del Rio, Pedro Maria Santiago},
year = {2014},
booktitle = {Traffic Measurement and Analysis (TMA)},
month = apr,
pages = {1-14},
note = {keyword=measurement,10Gbps+ category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14tma-b.pdf}
}
While the ultimate goal of kernel-level network stacks is to manage individual packets at line rate, the goal of user-level network monitoring applications is instead to match packets with the flow they belong to, and take actions accordingly. With current improvements in Network Interface Cards hardware and network software stacks, traffic monitors and traffic analyzers are fed with multi-Gbps streams of packets – which de facto pushes bottlenecks from kernel-level networking stack up to user-level applications. In this paper, we argue that flow management is a crucial module for any user-application that needs to process traffic at multiple Gbps, and we study the performance impact of different design choices of the flow management module by adopting a trace-driven emulation approach. While our results do not show a single “best” system settings under all circumstances, they highlight several tradeoffs, in terms of, e.g., the kind of structure, its size, and the computational complexity, that may affect system performance in a non-trivial way. We further make our software tools available to the scientific community to promote sharing of best practices
@inproceedings{DR:PAM-14b,
author = {Casoria, P. and Rossi, D. and Auge, J. and Buob, Marc-Olivier and Friedman, T. and Pescape, A.},
title = {Distributed active measurement of Internet queuing delays},
booktitle = {Passive and Active Measurement (PAM), Extended Abstract},
year = {2014},
address = {Los Angeles, USA},
month = mar,
note = {keyword=bufferbloat,mplane,measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14pam-b.pdf}
}
Despite growing link capacities, over-dimensioned buffers are still causing, in the Internet of the second decade of the third millenium, hosts to suffer from severe queuing delays (or bufferbloat). While maximum bufferbloat possibly exceeds few seconds, it is far less clear how often this maximum is hit in practice. This paper reports on our ongoing work to build a spatial and temporal map of Internet bufferbloat, describing a system based on distributed agents running on PlanetLab that aims at providing a quantitative answer to the above question.
[PAM-14c]
Mazloum, R. and Buob, M.-O. and Auge, J. and Baynat, B. and Friedman, T. and Rossi, D.,
"Violation of Interdomain Routing Assumptions"
Passive and Active Measurement (PAM),
mar.
2014,
Conference
@inproceedings{DR:PAM-14c,
author = {Mazloum, R. and Buob, M.-O. and Auge, J. and Baynat, B. and Friedman, T. and Rossi, D.},
title = {Violation of Interdomain Routing Assumptions},
booktitle = {Passive and Active Measurement (PAM),},
year = {2014},
address = {Los Angeles, USA},
month = mar,
note = {keyword=bgp,mplane,measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14pam-c.pdf}
}
@inproceedings{DR:PAM-14a,
author = {Colabrese, S. and Rossi, D. and Mellia, M.},
title = {Scalable accurate consolidation of passively measured statistical data},
booktitle = {Passive and Active Measurement (PAM), Extended Abstract},
year = {2014},
address = {Los Angeles, USA},
month = mar,
pages = {262--264},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14pam-a.pdf}
}
Passive probes continuously collect a significant amount of traffic volume, and autonomously generate statistics on a large number of metrics. A common statistical output of passive probe is represented by probability mass functions (pmf). The need for consolidation of several pmfs arises in two contexts, namely: (i) whenever a central point collects and aggregates measurement of multiple disjoint vantage points, and (ii) whenever a local measurement processed at a single vantage point needs to be distributed over multiple cores of the same physical probe, in order to cope with growing link capacity. Taking an experimental approach, we study both cases assessing the impact of different consolidation strategies, obtaining general design and tuning guidelines.
BitTorrent, one of the most widespread file-sharing P2P applications, recently introduced LEDBAT, a novel congestion control protocol aiming at (i) limiting the additional delay due to queuing, to reduce interference with the rest of user traffic (e.g., Web, VoIP and gaming) sharing the same access bottleneck, and (ii) efficiently using the available link capacity, to provide users with good BitTorrent performance at the same time. In this work, we adopt two complementary perspectives: namely, a flow viewpoint to assess the Quality of Service (QoS) as in classic congestion control studies, and a BitTorrent swarm viewpoint to assess peer-to-peer users Quality of Experience (QoE). We additionally point out that congestion control literature is rich of protocols, such as VEGAS, LP, and NICE sharing similarities with LEDBAT, that is therefore mandatory to consider in the analysis. Hence, adopting the above viewpoints we both (i) contrast LEDBAT to the other protocols and (ii) provide deep understanding of the novel protocol and its implication on QoS and QoE. Our simulation based investigation yields several insights. At flow-level, we gather LEDBAT to be lowest priority among all protocols, which follows from its design that strives to explicitly bound the queuing delay at the bottleneck link to a maximum target value. At the same time, we see that this very same protocol parameter can be exploited by adversaries, that can set a higher target to gain an unfair advantage over competitors. Interestingly, swarm-level performance exhibit an opposite trade-off, with smaller targets being more advantageous for QoE of BitTorrent users. This can be explained with the fact that larger delay targets slow down BitTorrent signaling task, with possibly negative effect on the swarming protocol efficiency. Additionally, we see that for the above reason, in heterogeneous swarms, any delay-based protocol (i.e., not only LEDBAT but also VEGAS or NICE) can yield a competitive QoE advantage over loss-based TCP. Overall this tension between swarm and flow-levels suggests that, at least in current ADSL/cable access bottleneck scenarios, a safe LEDBAT operational point may be used in practice. At the same time, our results also point out that benefits similar to LEDBAT can also be gathered with other delay-based protocols such as VEGAS or NICE.
@article{DR:COMNET-14b,
author = {Gong, YiXi and Rossi, Dario and Testa, Claudio and Valenti, Silvio and Taht, Dave},
title = {{Fighting the bufferbloat: on the coexistence of AQM and low priority congestion control (extended version)}},
journal = {Computer Networks},
volume = {65},
number = {0},
pages = {255 -- 267},
year = {2014},
issn = {1389-1286},
note = {keyword=measurement,ledbat,tengineer,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi14comnet-b.pdf}
}
Nowadays, due to excessive queuing, delays on the Internet can grow longer than the round trip time between the Moon and the Earth – for which the “bufferbloa t” term was recently coined. Some point to active queue management (AQM) as the solution. Others propose end-to-end low-priority congestion control techniques (LPCC). Under both approaches, promising advances have been made in recent times: notable examples are CoDel for AQM, and LEDBAT for LPCC. In this paper, we warn of a potentially fateful interaction when AQM and LPCC techniques are combined: namely, AQM resets the relative level of priority between best-effort and low-priority congestion control protocols. We validate the generality of our findings by an extended set of experiments with packet-level ns2 simulation, considering 5 AQM techniques and 3 LPCC protocols, and carry on a thorough sensitivity analysis varying several parameters of the networking scenario. We complete the simulation via an experimental campaign conducted on both controlled testbeds and on the Internet, confirming the reprioritization issue to hold in the real world at least under all combination of AQM policies and LPCC protocols available in the Linux kernel. To promote cross-comparison, we make our scripts and dataset available to the research community.
[PATENT-US10530893B2]
Rossi, D. and Rossini, G.,
"Method for managing packets in a network of Information Centric Networking (ICN) nodes" , Patent EPO14305866.7, US10530893B2 patent, keyword=ccn
2014,
Patent
@misc{DR:PATENT-US10530893B2,
author = {Rossi, D. and Rossini, G.},
howpublished = {Patent EPO14305866.7, US10530893B2 },
title = {Method for managing packets in a network of Information Centric Networking (ICN) nodes},
note = { patent, keyword=ccn},
year = {2014},
patent = {True}
}
[PATENT-EP2940950B1]
Rossini, Giuseppe and Rossi, Dario and Garetto, Michele and Leonardi, Emilio,
"Information Centric Networking (ICN) router" , Patent EPO14305639.8, WO EP US JP EP2940950B1 patent, keyword=ccn
2014,
Patent
@misc{DR:PATENT-EP2940950B1,
author = {Rossini, Giuseppe and Rossi, Dario and Garetto, Michele and Leonardi, Emilio},
howpublished = {Patent EPO14305639.8, WO EP US JP EP2940950B1},
title = {Information Centric Networking (ICN) router},
note = { patent, keyword=ccn},
year = {2014},
patent = {True}
}
@inproceedings{DR:GLOBECOM-13,
title = {Passive bufferbloat measurement exploiting transport layer information},
author = {Chirichella, C. and Rossi, D. and Testa, C. and Friedman, T. and Pescape, A.},
year = {2013},
booktitle = {IEEE GLOBECOM},
month = dec,
note = {keyword=traffic,ledbat category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13globecom.pdf}
}
@inproceedings{DR:CONEXT-13,
title = {Dissecting Bufferbloat: Measurement and Per-Application Breakdown of Queueing Delay},
author = {A. Araldo, D. Rossi},
year = {2013},
booktitle = {ACM CoNEXT'13, Student Workshop},
month = dec,
pages = {25-26},
note = {keyword=bufferbloat,measurement category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13conext.pdf}
}
We propose a passive methodology to estimate the queueing delay incurred by TCP traffic, and additionally leverage DPI classification to breakdown the delay across different applications. Ultimately, we correlate the queueing delay to the performance perceived by the users of that applications, depending on their delay-sensitivity. We implement our methodology in Tstat, and make it available 1 as open source software to the community. We validate and tune the tool, and run a preliminary measurement campaign based on a real ISP traffic trace, showing interesting yet partly counter-intuitive results.
@inproceedings{DR:ITC-13,
title = {Modeling the interdependency of low-priority congestion control and active queue management},
author = {Gong, YiXi and Rossi, Dario and Leonardi, Emilio},
booktitle = {The 25th International Teletraffic Congress (ITC25), Runner-up for Best Paper Award},
year = {2013},
month = sep,
note = {keyword=ledbat,bestpaperrunnerup category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13itc.pdf}
}
Recently, a negative interplay has been shown to arise when scheduling/AQM techniques and low-priority conges- tion control protocols are used together: namely, AQM resets the relative level of priority among congestion control protocols. This work explores this issue by (i) studying a fluid model that describes system dynamics of heterogeneous congestion control protocols competing on a bottleneck link governed by AQM and (ii) proposing a system level solution able to reinstate priorities among protocols.
@inproceedings{DR:CAMAD-13,
title = {On Netflix catalog dynamics and caching performance},
author = {Bellante, Walter and Vilardi, Rosa and Rossi, Dario},
institution = {Telecom ParisTech},
year = {2013},
booktitle = {IEEE CAMAD},
address = {Berlin, Germany},
month = sep,
note = {keyword=traffic category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13camad.pdf}
}
Multimedia streaming applications have substantially changed the market policy of an increasing number of content providers that offer streaming services to the users. The need for effective video content delivery re-fueled interest for caching: since the Web-like workload of the 90s are not longer fit to describe the new Web of videos, in this work we investigate the suitability of the publicly available Netflix dataset for caching studies. Our analysis shows that, as the dataset continuously evolves (i) a steady state description is not statistically meaningful and (ii) despite the cache hit ratio decreases due to the growth of active movies in the catalog, simple caching replacement approaches are close to the optimum given the growing skew in the popularity distribution over the time. Additionally, we point out that, since the dataset reports logs of movie ratings, anomalies arise when ratings are considered to be movie views. At the same time, we show anomalies yield conservative caching results, that reinforces the soundness of our study.
@inproceedings{DR:P2P-13b,
title = {I tube, YouTube, P2PTube: assessing ISP benefits of peer-assisted caching of YouTube content},
author = {Rossi, D. and Nicolas, Y. and Wolff, D. and Finamore, A.},
institution = {Telecom ParisTech},
year = {2013},
booktitle = {IEEE P2P'XIII},
address = {Trento, Italy},
month = sep,
note = {keyword=traffic,inf570 category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13p2p-b.pdf}
}
This paper proposes P2PTube, a very simple yet effective set-top-box system to assist diffusion of YouTube videos. We argue that, due to the spatial and temporal nature of video requests, the simplest design already provides non marginal gains. Our trace driven evaluation shows that, with moderate cache size (100MB) and nominal upload rates (500Kbps), about half of the video requests could be served by P2PTube. Interestingly, we also see that non marginal gains are already achievable with tiny caches – which is tied to the presence of advertisement prior that the actual video requested by the user.
@inproceedings{DR:P2P-13a,
title = {Data Plane Throughput vs Control Plane Delay: Experimental Study of BitTorrent Performance},
author = {Testa, C. and Rossi, D. and Rao, A. and Legout, A.},
year = {2013},
booktitle = {IEEE P2P'XIII},
address = {Trento, Italy},
month = sep,
note = {keyword=ledbat,bittorrent category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13p2p-a.pdf}
}
In this paper, we address the trade-off between the data plane efficiency and the control plane timeliness for the BitTorrent performance. We argue that loss-based congestion control protocols can fill large buffers, leading to a higher end-to-end delay, unlike low-priority or delay-based congestion control protocols. We perform experiments for both the uTorrent and mainline BitTorrent clients, and we study the impact of uTP (a novel transport protocol proposed by BitTorrent) and several TCP congestion control algorithms (Cubic, New Reno, LP, Vegas and Nice) on the download completion time. Briefly, in case peers in the swarm all use the same congestion control algorithm, we observe that the specific algorithm has only a limited impact on the swarm performance. Conversely, when a mix of TCP congestion control algorithms coexists, peers employing a delay-based low-priority algorithm exhibit shorter completion time.
@article{DR:COMNET-13,
title = {Rethinking low extra delay backtround transport protocols},
author = {},
journal = {Elsevier Computer Networks},
volume = {57},
month = jun,
pages = {1838--1852},
year = {2013},
note = {keyword=ledbat category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13comnet.pdf}
}
BitTorrent has recently introduced LEDBAT, a novel application-layer congestion control protocol for data exchange. The protocol design assumes that network bottlenecks are at the access of the network, and that thus user traffic competes creating self-induced congestion. To relieve this phenomenon, LEDBAT is designed to quickly infer when self-induced congestion is approaching (by detecting relative changes of the one-way delay in the transmission path), and to react promptly by reducing the sending rate prior to the congestion occurrence. Previous work has however shown LEDBAT to be affected by a latecomer advantage, where newly arriving connections can starve already existing flows. In this work, we propose modifications to the congestion window update mechanism of LEDBAT that solve this issue, thus guaranteeing intra-protocol fairness and efficiency. Closed-form expressions for the stationary throughput and queue occupancy are provided via a fluid model, whose accuracy is confirmed by means of ns2 packet level simulations. Our results show that the proposed change can effectively solve the latecomer issue, furthermore without affecting the other original LEDBAT goals.
[ICC-13]
Chiocchetti, Raffaele and Rossi, Dario and Rossini, Giuseppe,
"ccnSim: an Highly Scalable CCN Simulator"
IEEE International Conference on Communications (ICC)
jun.
2013,
Conference
@inproceedings{DR:ICC-13,
title = {{ccnSim}: an Highly Scalable CCN Simulator},
author = {Chiocchetti, Raffaele and Rossi, Dario and Rossini, Giuseppe},
booktitle = {IEEE International Conference on Communications (ICC)},
year = {2013},
month = jun,
note = {keyword=ccn category=article state=published project=smartuc},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13icc.pdf}
}
Research interest about Information Centric Networking (ICN) has grown at a very fast pace over the last few years, especially after the 2009 seminal paper of Van Jacobson et al. describing a Content Centric Network (CCN) architecture. While significant research effort has been produced in terms of architectures, algorithms, and models, the scientific community currently lacks common tools and scenarios to allow a fair cross- comparison among the different proposals. The situation is particularly complex as the commonly used general-purpose simulators cannot cope with the expected system scale: thus, many proposals are currently evaluated over small and unrealistic scale, especially in terms of dominant factors like catalog and cache sizes. As such, there is need of a scalable tool under which different algorithms can be tested and compared. Over the last years, we have developed and optimized ccnSim, an highly scalable chunk-level simulator especially suitable for the analysis of caching performance of CCN network. In this paper, we briefly describe the tool, and present an extensive benchmark of its performance. To give an idea of ccnSim scalability, a common off-the-shelf PC equipped with 8GB of RAM memory is able to simulate 2-hours of a 50-nodes CCN network, where each nodes is equipped with 10 GB caches, serving a 1 PB catalog in about 20 min CPU time.
@inproceedings{DR:TMA-13a,
author = {Chirichella, C. and Rossi, D.},
title = {To the Moon and back: are Internet bufferbloat delays really that large},
booktitle = {IEEE INFOCOM Workshop on Traffic Measurement and Analysis (TMA'13)},
year = {2013},
address = {Turin, Italy},
month = apr,
note = {keyword=ledbat,bufferbloat,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13tma-a.pdf}
}
Recently, the \u201cbufferbloat\u201d term has been coined to describe very large queuing delays (up to several seconds) experienced by Internet users. This problem has pushed protocol designer to deploy alternative (delay-based) models to the standard (lossbased) TCP best effort congestion control. In this work, we exploit timestamp information carried in the LEDBAT header, a protocol proposed by BitTorrent as replacement for TCP data transfer, to infer the queuing delay suffered by remote hosts. We conduct a thorough measurement campaign, that let us conclude that (i) LEDBAT delay-based congestion control is effective in keeping the queuing delay low for the bulk of the peers, (ii) yet about 1% of peers often experience queuing delay in excess of 1s, and (iii) not only the network access type, but also the BitTorrent client and the operating system concurr in determining the bufferbloat magnitude.
@inproceedings{DR:TMA-13b,
author = {Gong, Y. and Rossi, D. and Testa, C. and Valenti, S. and Taht, D.},
title = {Fighting the bufferbloat: on the coexistence of AQM and low priority congestion control},
booktitle = {IEEE INFOCOM Workshop on Traffic Measurement and Analysis (TMA'13)},
year = {2013},
address = {Turin, Italy},
month = apr,
note = {keyword=ledbat,bufferbloat,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13tma-b.pdf}
}
Nowadays, due to excessive queuing, delays on the Internet can grow longer than several round trips between the Moon and the Earth, for which the "bufferbloat" term was recently coined. Some point to active queue management (AQM) as the solution. Others propose end-to-end low-priority congestion control techniques (LPCC). Under both approaches, promising advances have been made in recent times: notable examples are CoDel for AQM, and LEDBAT for LPCC. In this paper, we warn of a potentially fateful interaction when AQM and LPCC techniques are combined: namely (i) AQM resets the relative level of priority between best effort and low-priority congestion control protocols; (ii) while reprioritization generally equalizes the priority of LPCC and TCP, we also find that some AQM settings may actually lead best effort TCP to starvation. By an extended set of experiments conducted on both controlled testbeds and on the Internet, we show the problem to hold in the real world for all tested combination of AQM policies and LPCC protocols. To further validate the generality of our findings, we complement our experiments with packet-level simulation, to cover cases of other popular AQM and LPCC that are not available in the Linux kernel. To promote cross-comparison, we make our scripts and dataset available to the research community.
[PAM-13]
Chirichella, C. and Rossi, D. and Testa, C. and Friedman, T. and Pescape, A.,
"Remotely Gauging Upstream Bufferbloat Delays"
Passive and Active Measurement (PAM), Extended Abstract
mar.
2013,
Conference
@inproceedings{DR:PAM-13,
author = {Chirichella, C. and Rossi, D. and Testa, C. and Friedman, T. and Pescape, A.},
title = {Remotely Gauging Upstream Bufferbloat Delays},
booktitle = {Passive and Active Measurement (PAM), Extended Abstract},
year = {2013},
address = {Hong Kong, China},
month = mar,
note = {keyword=ledbat,bufferbloat,mplane category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13pam.pdf}
}
Bufferbloat is the growth in buffer size that has led Internet delays to occasionally exceed the light propagation delay from the Earth to the Moon. Manufacturers have built in large buffers to prevent losses on Wi-Fi, cable and ADSL links. But the combination of some links’ limited bandwidth with TCP’s tendency to saturate that bandwidth results in excessive queuing delays. In response, new congestion control protocols such as BitTorrent’s uTP/LEDBAT aim at explicitly limiting the delay that they add over the bottleneck link. This work proposes and validate a methodology to monitor the upstream queuing delay experienced by remote hosts, both those using LEDBAT, through LEDBAT’s native one-way delay measurements, and those using TCP (via the Timestamp Option).
@article{DR:COMNET-13b,
title = {FIB Aplasia through Probabilistic Routing and Autoforwarding},
author = {Rossini, Giuseppe and Rossi, Dario and Betoule, Christophe and Clavier, Remi and Thouenon, Gilles},
year = {2013},
volume = {57},
pages = {2802--2816},
issue = {14},
journal = {Elsevier Computer Networks},
note = {keyword=tengineer category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13comnet-b.pdf}
}
[TMA-13c]
Valenti, Silvio and Rossi, Dario and Dainotti, Alberto and Pescape, Antonio and Finamore, Alessandro and Mellia, Marco,
"Reviewing Traffic Classification"
2013,
Bookch
@inbook{DR:TMA-13c,
author = {Valenti, Silvio and Rossi, Dario and Dainotti, Alberto and Pescape, Antonio and Finamore, Alessandro and Mellia, Marco},
title = {Reviewing Traffic Classification},
editor = {Ernst Biersack, Christian Callegari, Maja Matijasevic},
booktitle = {Data Traffic Monitoring and Analysis: From measurement, classification and anomaly detection to Quality of Experience},
publisher = {Springer (LNCS 7754)},
address = {Heidelberg, Germany},
year = {2013},
chapter = {6},
note = {keyword=classification category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13tma-bookch.pdf}
}
@inproceedings{DR:ICN-13,
title = {{INFORM: a Dynamic Interest Forwarding Mechanism for Information Centric Networking}},
author = {},
booktitle = {ACM SIGCOMM Worskhop on Information-Centric Networking (ICN)},
year = {2013},
address = {Hong Kong, China},
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13icn.pdf}
}
Information Centric Networking is a new communication paradigm where network primitives are based on named-data rather than host identifiers. In ICN, data retrieval is triggered by user requests which are forwarded towards a copy of the desired content item. Data can be retrieved either from a server that permanently provides a content item, or from a temporary item copy opportunistically cached by an in-network node. As the availability of cached items dynamically varies over time, the request forwarding scheme should be adapted accordingly. In this paper we focus on dynamic request forwarding in ICN, and develop an approach, inspired by Q-routing framework, that we show to outperform algorithms currently available in the state of the art.
@article{DR:PPNA-13,
author = {Rossi, Dario and Veglia, Paolo and Sammarco, Matteo and Larroca, Federico},
title = {ModelNet-TE: An emulation tool for the study of P2P and Traffic Engineering interaction dynamics},
journal = {Springer Peer-to-peer Networking and Applications (PPNA)},
year = {2013},
volume = {6},
issue = {2},
pages = {194-212},
note = {keyword=p2p,p2p-tv,te category=article state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13ppna.pdf}
}
In the Internet, user-level performance of P2P applications may be determined by the interaction of two independent dynamics: on the one hand, by the end-to-end control policies applied at the P2P application layer (L7); on the other hand, by Traffic Engineering (TE) decisions taken at the network level (L3). Currently available tools do not allow to study L7/L3 interactions in realistic settings due to a number of limitations. Building over ModelNet, we develop a framework for the real-time emulation of TE capabilities, named ModelNet-TE, that we make available to the scientific community as open source software. ModelNet-TE allows (i) to deploy real unmodified Internet P2P applications, and to test their interaction with (ii) many TE algorithms, as its design allows to easily integrate other TE algorithms than those we already provide, (iii) in a furthermore controlled network environment. Due to these features, ModelNet-TE is a complementary tool with respect to hybrid simulation/protoyping toolkits (that constrain application development to a specific language and framework, and cannot be used with existing or proprietary applications) and to other open testbeds such as PlanetLab or Grid5000 (lacking of control or TE-capabilities respectively). ModelNet-TE can thus be useful to L7-researchers, as it allows to seamlessly and transparently test any existing P2P application without requiring any software modification. At the same time, ModelNet-TE can be useful to L3-researchers as well, since they can test their TE algorithms on the traffic generated by real applications. As a use case, in this work we carry on an experimental campaign of L7/L3 routing layers interaction through ModelNet-TE. As TE we consider the classic minimum congestion load-balancing, that we compare against standard IP routing. As example P2P applications, we take BitTorrent, one among the most popular file-sharing applications nowadays, and WineStreamer, an open source live-streaming application. We emulate BitTorrent and WineStreamer swarms over both realistic topologies (e.g., Abilene) and simplistic topologies that are commonly in use today (e.g., where the bottleneck is located at the network edge) under a variety of scenarios. Results of our experimental campaign show that user-level performance may be significantly affected by both the TE mechanism in use at L3 (e.g., due to interactions with TCP congestion control or P2P chunk trading logic), as well as scenario parameters that are difficult to control in the wild Internet, which thus testifies the interest for tools such as ModelNet-TE.
@article{DR:COMCOM-13,
author = {Rossini, Giuseppe and Rossi, Dario},
title = {Evaluating CCN multi-path interest forwarding strategies},
journal = {Elsevier Computer Communication, SI on Information Centric Networking,},
month = {Avril},
volume = {36},
issue = {7},
pages = {771-778},
year = {2013},
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi13comcom.pdf}
}
This work addresses the performance evaluation of Content Centric Networks (CCN). Focusing on a realistic YouTube-like catalog, we conduct a thorough simulation study of the main system performance, with a special focus on multi-path interest forwarding strategies but thoroughly analyzing the impact of several other ingredients – such as network topology, content popularity, caching decisions and replacement policies. Summarizing our main results, (i) catalog and popularity settings play by far the most crucial role (ii) the impact of the strategy layer comes next, with naive forwarding strategies playing against CCN efficiency, (iii) simple randomized caching policies perform almost as well as more complex ones, (iv) the impact of the topology is limited. Hopefully, our thorough assessment of scenario parameters can assist and promote the cross-comparison in the research community – for which we also provide our CCN simulator as open source software.
[PATENT-EP2835942B1]
Perino, D. and Carofiglio, G. and Rossi, D. and Rossini, G.,
" Dynamic Interest Forwarding Mechanism for Information Centric Networking " , Patent EPO13306124.2, EP2835942B1 patent, keyword=ccn
.
2013,
Patent
@misc{DR:PATENT-EP2835942B1,
author = {Perino, D. and Carofiglio, G. and Rossi, D. and Rossini, G.},
title = { Dynamic Interest Forwarding Mechanism for Information Centric Networking },
howpublished = {Patent EPO13306124.2, EP2835942B1 },
address = {},
year = {2013},
month = {},
volume = {},
number = {},
pages = {},
annote = {},
note = { patent, keyword=ccn},
patent = {True}
}
[PATENT-EP2785014A1]
Perino, D. and Carofiglio, G. and Chiocchetti, R. and Rossi, D. and Rossini, G.,
" Device and method for organizing forwarding information in nodes of a content centric networking " , Patent EPO13161714.4, EP2785014A1 patent, keyword=ccn
2013,
Patent
@misc{DR:PATENT-EP2785014A1,
author = {Perino, D. and Carofiglio, G. and Chiocchetti, R. and Rossi, D. and Rossini, G.},
title = { Device and method for organizing forwarding information in nodes of a content centric networking },
howpublished = {Patent EPO13161714.4, EP2785014A1 },
note = { patent, keyword=ccn},
year = {2013},
patent = {True}
}
@inproceedings{DR:CONEXT-12,
title = {Interaction or Interference: can AQM and Low Priority Congestion Control Successfully Collaborate},
author = {},
year = {2012},
booktitle = {ACM CoNEXT'12 Student Workshop},
month = dec,
pages = {25-26},
note = {keyword=bufferbloat,ledbat category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12conext.pdf}
}
Heterogeneity in the Internet ecosystem sometimes turns interaction into interference. Over the years, active queue management (AQM) and end-to-end low-priority congestion control (LPCC) have been proposed as alternative solutions to counter the persistently full buffer problem – that recently became popular under the "bufferbloat" term. In this work, we point out the existence of a negative interplay among AQM and LPCC techniques. Intuitively, as AQM is designed to penalize the most aggressive flows it mainly hit best effort TCP: it follows that LPCC is not able to maintain its low priority, thus becoming as aggressive as TCP. By an extended set of simulation on various AQM policies and LPCC protocols, including the very recent CoDel AQM and LEDBAT LPCC proposals, we point out that this interference is quite universal and deserves further attention.
@article{DR:IJNM-12,
author = {},
title = {Exploiting packet sampling measurements for traffic characterization and classification},
journal = {International Journal of Network Management},
month = nov,
vol = {22},
issue = {6},
year = {2012},
pages = {451-476},
doi = {10.1002/nem.1802},
note = {keyword=traffic category=article state=published project=},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12ijnm.pdf}
}
The use of packet sampling for traffic measurement has become mandatory for network operators to cope with the huge amount of data transmitted in nowadays networks, powered by increasingly faster transmission technologies. Therefore, many networking tasks must already deal with such reduced data, more available but less rich in information. In this work we assess the impact of packet sampling on various network monitoring activities, with a particular focus on traffic characterization and classification. We process an extremely heterogeneous dataset composed of four packet level traces (representative of different access technologies and operational environments) with a traffic monitor able to apply different sampling policies and rates to the traffic and extract several features both in aggregated and per-flow fashion, providing empirical evidences of the impact of packet sampling on both traffic measurement and traffic classification. First, we analyze feature distortion, quantified by means of two statistical metrics: most features appear already deteriorated under low sampling step, no matter the sampling policy, while only a few remain consistent under harsh sampling conditions, which may even cause some artifacts undermining the correctness of measurements. Second, we evaluate the performance of traffic classification under sampling. The information content of features, even though deteriorated, still allows a good classification accuracy, provided that the classifier is trained with data obtained at the same sampling rate of the target data. The accuracy is also due to a thoughtful choice of a smart sampling policy which biases the sampling towards packets carrying the most useful information.
@inproceedings{DR:IMC-12,
title = {Wire-Speed Statistical Classification of Network Traffic on Commodity Hardware},
author = {del Rio, P.M. Santiago and Rossi, D. and Gringoli, F. and Nava, L. and Salgarelli, L. and Aracil, J.},
year = {2012},
pages = {65-72},
booktitle = {ACM SIGCOMM Internet Measurement Conference (IMC)},
month = nov,
note = {keyword=classification,measurement,10Gbps+ category=article state=published project=mplane},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12imc.pdf}
}
In this paper we present a software-based traffic classification engine running on commodity multi-core hardware, able to process in real-time aggregates of up to 14.2 Mpps over a single 10 Gbps interface \u2013 i.e., the maximum possible packet rate over a 10 Gbps Ethernet links given the minimum frame size of 64 Bytes. This significant advance with respect to the current state of the art in terms of achieved classification rates are made possible by: (i) the use of an improved network driver, PacketShader, to efficiently move batches of packets from the NIC to the main CPU; (ii) the use of lightweight statistical classification techniques exploiting the size of the first few packets of every observed flow; (iii) a careful tuning of critical parameters of the hardware environment and the software application itself.
@inproceedings{DR:CAMAD-12,
title = {A dive into the caching performance of Content Centric Networking},
author = {G. Rossini, D. Rossi},
booktitle = {IEEE 17th International Workshop on Computer Aided Modeling and Design of Communication Links and Networks (CAMAD'12)},
year = {2012},
month = sep,
pages = {105-109},
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12camad.pdf}
}
Content Centric Networking (CCN) is a promising architecture for the diffusion of popular content over the Internet. While CCN system design is sound, gathering a reliable estimate of its performance in the current Internet is challenging, due to the large scale and to the lack of agreement in some critical elements of the evaluation scenario. In this work, we add a number of important pieces to the CCN puzzle by means of a chunk-level simulator that we make available to the scientific community as open source software. First, we pay special attention to the locality of the user request process, as it may be determined by user interest or language barrier. Second, we consider the existence of possibly multiple repositories for the same content, as in the current Internet, along with different CCN interest forwarding policies, exploiting either a single or multiple repositories in parallel. To widen the relevance of our findings, we consider multi- ple topologies, content popularity settings, caching replacement policies and CCN forwarding strategies. Summarizing our main result, though the use of multiple content repositories can be beneficial from the user point of view, it may however counter part of the benefits if the CCN strategy layer implements naive interest forwarding policies.
@inproceedings{DR:P2P-12,
title = {Inferring the buffering delay of remote {BitTorrent} peers under {LEDBAT} vs {TCP}},
author = {},
year = {2012},
booktitle = {IEEE P2P'XII},
month = sep,
pages = {77-78},
note = {keyword=ledbat category=article state=published project=misc},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12p2p.pdf}
}
Nowadays, due to excessive queuing, Internet delays grow sometimes as large as the propagation delay from moon to earth – for which the bufferbloat term was recently coined. Some points to active queue management (AQM) as its solution, others propose end-to-end congestion control techniques – like BitTor- rent that recently replaced TCP with the LEDBAT transport protocol. In this demo, we implement a methodology to monitor the upstream queuing delay experienced by remote hosts, both those using LEDBAT, through LEDBAT’s native one-way delay mea- surements, and those using TCP, through the timestamp option. By actively taking part into torrent downloads as leechers, our software is able to infer (and visualize) the amount of access delay suffered by the remote peers.
@inproceedings{DR:ALGOTEL-12,
author = {Rossini, G. and Rossi, D.},
title = {Large scale simulation of CCN networks},
booktitle = {Algotel 2012},
year = {2012},
address = {La Grande Motte, France},
month = may,
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12algotel.pdf}
}
This work addresses the performance evaluation of Content Centric Networks (CCN). Focusing on a realistic YouTube-like catalog, we conduct a very thorough simulation study of the main system performance, consider several ingredients such as network topology, multi-path routing, content popularity, caching decisions and replacement policies. Summarizing our main results, we gather that (i) the impact of the topology is limited, (ii) multi-path routing may play against CCN efficiency, (iii) simple randomized policies perform almost as well as more complex ones, (iv) catalog and popularity settings play by far the most crucial role above all. Hopefully, our thorough assessment of scenario parameters can assist and promote the cross-comparison in the research community – for which we also provide our CCN simulator as open source software.
@inproceedings{DR:TMA-12,
author = {Testa, C. and Rossi, D. and Rao, A. and Legout, A.},
title = {Experimental Assessment of BitTorrent Completion Time in Heterogeneous TCP/uTP swarms},
booktitle = {Traffic Measurement and Analysis (TMA) Workshop at Passive and Active Measurement (PAM)},
year = {2012},
pages = {52-56},
address = {Wien, AU},
month = mar,
note = {keyword=p2p,bittorrent,ledbat category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12tma.pdf}
}
BitTorrent, one of the most widespread used P2P application for file- sharing, recently got rid of TCP by introducing an application-level congestion control protocol named uTP. The aim of this new protocol is to efficiently use the available link capacity, while minimizing its interference with the rest of user traffic (e.g., Web, VoIP and gaming) sharing the same access bottleneck. In this paper we perform an experimental study of the impact of uTP on the torrent completion time, the metric that better captures the user experience. We run BitTorrent applications in a flash crowd scenario over a dedicated cluster platform, under both homogeneous and heterogeneous swarm population. Exper- iments show that an all-uTP swarms have shorter torrent download time with respect to all-TCP swarms. Interestingly, at the same time, we observe that even shorter completion times can be achieved under mixtures of TCP and uTP traffic, as in the default BitTorrent settings.
@inproceedings{DR:NOMEN-12,
author = {Rossi, D. and Rossini, G.},
title = {On sizing CCN content stores by exploiting topological information},
booktitle = {IEEE INFOCOM, NOMEN Worshop,},
year = {2012},
address = {Orlando, FL},
pages = {280-285},
month = mar,
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12nomen.pdf}
}
In this work, we study the caching performance of Content Centric Networking (CCN), with special emphasis on the size of individual CCN router caches. Specifically, we consider several graph-related centrality metrics (e.g., betweenness, closeness, stress, graph, eccentricity and degree centralities) to allocate content store space heterogeneously across the CCN network, and contrast the performance to that of an homogeneous allocation. To gather relevant results, we study CCN caching performance under large cache sizes (individual content stores of 10 GB), realistic topologies (up to 60 nodes), a YouTube-like Internet catalog (108 files for 1PB video data). A thorough simulation campaign allow us to conclude that (i) , the gain brought by content store size heterogeneity is very limited, and that (ii) the simplest metric, namely degree centrality, already proves to be a “sufficiently good” allocation criterion. On the one hand, this implies rather simple rules of thumb for the content store sizing (e.g., “if you add a line card to a CCN router, add some content store space as well”). On the other hand, we point out that technological constraints, such as line- speed operation requirement, may however limit the applicability of degree-based content store allocation.
@inbook{DR:GREENBOOKCH-12,
author = {A. P. Bianzino, A. K. Raju, D. Rossi},
booktitle = {Sustainable Green Computing: Practices, Methodologies and Technologies},
title = {Energy Consumption in the Internet Core: a Sensitivity Analysis},
editor = {Naima, K. and Chen, W.},
publisher = {IGI Global},
address = {Hershey, Pennsylvania (USA)},
year = {2012},
note = {keyword=green category=inbook state=toappear project=ngi group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11greenbookch.pdf}
}
@article{DR:PPNA-12,
author = {Paolo Veglia, Dario Rossi},
title = {Performance evaluation of P2P-TV diffusion algorithms under realistic settings},
journal = {Springer Peer-to-peer Networking and Applications (PPNA)},
year = {2012},
volume = {6},
issue = {1},
pages = {26-45},
note = {keyword=p2p,p2p-tv category=article state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12ppna.pdf}
}
Internet video and peer-to-peer television (P2P-TV) are attracting more and more users: chances are that P2P-TV is going to be the next Internet killer application. In recent years, valuable effort has been devoted to the problems of chunk-scheduling and overlay management in P2P-TV systems. However, many interesting P2P-TV proposals have been evaluated either in rather idealistic environments, or in the wild Internet. Our work sits in between these two antipodean approaches: our aim is to compare existing systems in a controlled way, but taking special care in realistic conditions for their evaluation at the same time. We carry on a simulation analysis that considers several factors, modeling the L7 overlay (e.g., chunk scheduling, topology management, overlay topology, etc.), the L3 network (e.g., end-to-end latency models, fixed vs dynamic conditions, etc.), and the interaction of both layers (e.g., measurement errors, loss of signaling messages, etc.). To depict a comprenshive system view, results are expressed in terms of both user-centric and network-centric metrics. In a nuthshell, our main finding is that P2P-TV systems are generally robust against measurement errors (e.g., propagation delay or capacity estimation), but are on the contrary deeply affected by signaling errors (e.g., loss or outdated system view), which are often overlooked without justification.
@inproceedings{DR:NTMS-12,
author = {},
title = {Adaptive Probabilistic Flooding for Multi-path Routing},
booktitle = {IFIP NTMS, Best paper award},
year = {2012},
pages = {1-6},
note = {keyword=locarn,bestpaperaward category=article state=published project=locarn},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12ntms.pdf}
}
In this work, we develop a distributed routing algorithm for topology discovery, suitable for ISP transport networks, that is however inspired by opportunistic algorithms used in ad hoc wireless networks. We propose a plug-and-play control plane, able to find multiple paths toward the same destination, and introduce a novel algorithm, called adaptive probabilistic flooding, to achieve this goal. By keeping a small amount of state in routers taking part in the discovery process, our technique significantly limits the amount of control messages exchanged with flooding \u2013 and, at the same time, it only minimally affects the quality of the discovered multiple path with respect to the optimal solution. Simple analytical bounds, confirmed by results gathered with extensive simulation on several topologies (up to 10,000 nodes), show our approach to be of high practical interest.
@article{DR:SBE-12,
author = {},
title = {The Green-Game: Accounting for Device Criticality in Resource Consolidation for Backbone IP Networks},
journal = {Strategic Behavior and the Environment, SI on ICT-based strategies for environmental conflicts},
year = {2012},
note = {keyword=green category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12sbe.pdf}
}
@inproceedings{DR:ICN-12,
author = {Chiocchetti, Raffaele and Rossi, Dario and Rossini, Giuseppe and Carofiglio, Giovanna and Diego Perino},
title = {Exploit the known or explore the unknown: Hamlet-like doubts in ICN},
booktitle = {ACM SIGCOMM, ICN Workshop,},
year = {2012},
note = {keyword=ccn category=article state=published project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi12icn.pdf}
}
@inproceedings{DR:ITC-23,
author = {Bianzino, Aruna Prem and Chaudet, Claude and Rossi, Dario and Rougier, Jean-Louis and Moretti, Stefano},
title = {The Green-Game: Striking a Balance between QoS and Energy Saving},
booktitle = {23rd International Teletraffic Congress (ITC23)},
month = sep,
year = {2011},
location = {San Francisco, USA},
note = {keyword=green },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11itc.pdf}
}
@article{DR:NETWORK-11,
author = {},
title = {Experiences of Internet Traffic Monitoring with Tstat},
journal = {IEEE Network Magazine, Special Issue on Network Traffic Monitoring and Analysis},
year = {2011},
month = may,
note = {keyword=measurement category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11network.pdf}
}
@article{DR:ITPRO-11,
author = {Raju, A. and Bianzino, A. P. and Rossi, D.},
title = {Greening the Internet: Measuring Web Power Consumption},
journal = {IEEE ITProfessional},
year = {2011},
month = jan,
vol = {13},
issue = {1},
note = {keyword=green category=article state=published project=},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11itpro.pdf}
}
[PER-11]
Bianzino, Aruna Prem and Raju, Anand Kishore and Rossi, Dario,
"Apples-to-apples: a framework analysis for energy-efficiency in networks"
In SIGMETRICS Perform. Eval. Rev.,
Vol. 38,
pp.81–85,
jan.
2011,
DOI http://doi.acm.org/10.1145/1925019.1925036
Journal
@article{DR:PER-11,
author = {Bianzino, Aruna Prem and Raju, Anand Kishore and Rossi, Dario},
title = {Apples-to-apples: a framework analysis for energy-efficiency in networks},
journal = {SIGMETRICS Perform. Eval. Rev.},
volume = {38},
issue = {3},
month = jan,
year = {2011},
issn = {0163-5999},
pages = {81--85},
numpages = {5},
doi = {http://doi.acm.org/10.1145/1925019.1925036},
acmid = {1925036},
publisher = {ACM},
address = {New York, NY, USA},
note = { }
}
@inproceedings{DR:NTMS-11,
author = {Veglia, P. and Rossi, D.},
title = {Assessing the impact of signaling on the QoE of push-based P2P-TV diffusion algorithms},
booktitle = {4th IFIP International Conference on New Technologies, Mobility and Security (NTMS'11)},
year = {2011},
address = {Paris, France},
month = {7-10 February},
note = {keyword=p2p category=article state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11ntms.pdf}
}
@inproceedings{DR:TRAC-11,
author = {Valenti, S. and Rossi, D.},
title = {Fine-grained behavioral classification in the core: the issue of flow sampling},
booktitle = {IEEE International Workshop on TRaffic Analysis and Classification (TRAC'11)},
year = {2011},
month = {5-9 July},
address = {Istanbul, Turkey},
note = {keyword=classification,abacus category=article state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11trac.pdf}
}
@techreport{DR:CCN-TR11a,
title = {Caching performance of content centric networks under multi-path routing (and more)},
author = {D. Rossi, G. Rossini},
institution = {Telecom ParisTech},
year = {2011},
note = {keyword=ccn category=techrep, project=connect},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi11ccn-techrep1.pdf}
}
[MODELNET-TR11]
E. Alessandria, L. Muscariello, D. Rossi,
"ModelNet-TE: An emulation tool for the study of P2P and Traffic Engineering interaction dynamics"
Telecom ParisTech
2011,
Tech.Rep.
@techreport{DR:MODELNET-TR11,
title = {ModelNet-TE: An emulation tool for the study of P2P and Traffic Engineering interaction dynamics},
author = {E. Alessandria, L. Muscariello, D. Rossi},
institution = {Telecom ParisTech},
year = {2011}
}
2010 # 23
[GLOBECOM-10a]
Carofiglio, G. and Muscariello, L. and Rossi, D. and Valenti, S.,
"The quest for LEDBAT fairness"
IEEE Globecom’10
dec.
2010,
Conference
@inproceedings{DR:GLOBECOM-10a,
title = {The quest for LEDBAT fairness},
author = {Carofiglio, G. and Muscariello, L. and Rossi, D. and Valenti, S.},
booktitle = {IEEE Globecom'10},
address = {Miami, FL, USA},
month = dec,
year = {2010},
note = {keyword=ledbat,trans,congestion control category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10globecom-a.pdf}
}
@inproceedings{DR:GLOBECOM-10b,
title = {Peer-to-peer traffic classification: exploiting human communication dynamics},
author = {Finamore, Alessandro and Mellia, Marco and Meo, Michela and Rossi, Dario and Valenti, Silvio},
booktitle = {IEEE Globecom'10, Demo Session},
address = {Miami, FL, USA},
month = dec,
year = {2010},
note = {keyword=trans,traffic classification category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10globecom-b.pdf}
}
[HDR-10]
Rossi, Dario,
"Peer-2-peer networking: the Dawn of the Homo Interconnexus Era"
dec.
2010,
@misc{DR:HDR-10,
title = {Peer-2-peer networking: the Dawn of the Homo Interconnexus Era},
author = {Rossi, Dario},
year = {2010},
month = dec,
school = { Universite Pierre et Marie Curie (UPMC)},
note = {HDR Thesis},
thesis = {True}
}
[GREENCOM-10]
Bianzino, Aruna Prem and Chaudet, Claude and Larroca, Federico and Rossi, Dario and Rougier, Jean-Louis,
"Energy-Aware Routing: a Reality Check"
IEEE Globecom Workshop on Green Communications (GreenCom’10)
dec.
2010,
Conference
@inproceedings{DR:GREENCOM-10,
title = {Energy-Aware Routing: a Reality Check},
author = {Bianzino, Aruna Prem and Chaudet, Claude and Larroca, Federico and Rossi, Dario and Rougier, Jean-Louis},
booktitle = {IEEE Globecom Workshop on Green Communications (GreenCom'10)},
address = {Miami, FL, USA},
month = dec,
year = {2010},
note = {keyword=tiger2,green networking category=inproceedings state=published project=tiger2},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10greencom.pdf}
}
@inproceedings{DR:LCN-10,
title = {A hands-on Assessment of Transport Protocols with Lower than Best Effort Priority},
author = {Carofiglio, G. and Muscariello, L. and Rossi, D. and Testa, C.},
booktitle = {35th IEEE Conference on Local Computer Networks (LCN'10)},
address = {Denver, CO, USA},
month = oct,
year = {2010},
note = {keyword=ledbat,trans,congestion control category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10lcn.pdf}
}
@inproceedings{DR:ITC22-10,
title = {{On the impact of sampling on traffic monitoring and analysis}},
author = {Pescape, A. and Rossi, D. and Tammaro, D. and Valenti, S.},
booktitle = {Proceedings of the 22nd International Teletraffic Congress (ITC22)},
address = {Amsterdam, The Netherlands},
month = sep,
year = {2010},
note = {keyword=sampling,measurement,trans category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10itc22.pdf}
}
@inproceedings{DR:ICCCN-10,
title = {LEDBAT: the new BitTorrent congestion control protocol},
author = {Rossi, D. and Testa, C. and Valenti, S. and Muscariello, L.},
booktitle = {International Conference on Computer Communication Networks (ICCCN'10)},
address = {Zurich, Switzerland},
month = aug,
year = {2010},
note = {keyword=ledbat,trans category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10icccn.pdf}
}
[WWIC-10]
Finamore, A. and Mellia, M. and Meo, M. and Munafo, M. and Rossi, D.,
"Live traffic monitoring with Tstat: capabilities and experiences"
8th International Conference on Wired/Wireless Communication (WWIC’10), Springer LNCS 6074, Invited Paper
jun.
2010,
Conference
@inproceedings{DR:WWIC-10,
author = {Finamore, A. and Mellia, M. and Meo, M. and Munafo, M. and Rossi, D.},
title = {{Live traffic monitoring with Tstat: capabilities and experiences}},
booktitle = {8th International Conference on Wired/Wireless Communication (WWIC'10), Springer LNCS 6074, Invited Paper},
address = {Lulea, Sweden},
year = {2010},
month = jun,
note = {keyword=measurement,anomaly,classification category=inproceedings state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10wwic.pdf}
}
@inproceedings{DR:GREENMETRICS-10,
title = {Apple-to-Apple: A Common Framework for Energy-Efficiency in Networks},
author = {Bianzino, Aruna Prem and Raju, Anand and Rossi, Dario},
booktitle = {ACM SIGMETRICS, GreenMetrics workshop},
address = {New York},
month = jun,
year = {2010},
note = {keyword=green category=inproceedings state=published project=tiger2},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10greenmetrics.pdf}
}
[ICC-10]
Mantia, G. La and Rossi, D. and Finamore, A. and Mellia, M. and Meo, M.,
"Stochastic Packet Inspection for TCP Traffic"
IEEE International Conference on Communications (ICC’10)
may.
2010,
Conference
@inproceedings{DR:ICC-10,
author = {Mantia, G. La and Rossi, D. and Finamore, A. and Mellia, M. and Meo, M.},
title = {{Stochastic Packet Inspection for TCP Traffic}},
booktitle = {IEEE International Conference on Communications (ICC'10)},
address = {Cape Town, South Africa},
year = {2010},
month = may,
note = {keyword=classification,measurement category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10icc.pdf}
}
@inproceedings{DR:PAM-10,
author = {Rossi, D. and Testa, C. and Valenti, S.},
title = {{Yes, we LEDBAT: Playing with the new BitTorrent congestion control algorithm}},
booktitle = {Passive and Active Measurement (PAM'10)},
address = {Zurich, Switzerland},
year = {2010},
month = apr,
note = {keyword=p2p,bittorrent,ledbat,congestion control category=inproceedings state=published project=trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10pam.pdf}
}
@inproceedings{DR:TMA-10,
author = {Finamore, A. and Mellia, M. and Meo, M. and Rossi, D. and Valenti, S.},
title = {{Kiss to Abacus: a comparison of P2P-TV traffic classifiers}},
booktitle = {Traffic Measurement and Analyis (TMA'10), LNCS},
address = {Zurich, Switzerland},
year = {2010},
month = apr,
note = {keyword=classification,measurement,abacus category=inproceedings state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10tma.pdf}
}
@article{DR:TMM-10,
author = {Horvath, A. and Telek, M. and Rossi, D. and Veglia, P. and Ciullo, D. and da Rocha Neta, A. G. and Leonardi, E. and Mellia, M.},
title = {Network Awareness of P2P Live Streaming Applications: a Measurement Study},
journal = {IEEE Transactions on Multimedia},
volume = {12},
number = {1},
pages = {54-63},
year = {2010},
month = jan,
note = {keyword=p2p,characterization,napa,p2pgauge category=article state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10tmm.pdf}
}
@article{DR:IJDMB-10,
author = {Rossi, D. and Veglia, P.},
title = {An Hybrid Approach to Assess the Network Awareness of P2P-TV Applications},
journal = {International Journal of Digital Multimedia Broadcasting, special issue on Network-Aware Peer-to-Peer (P2P) and Internet Video},
year = {2010},
month = {},
volume = {doi:10.1155/2010/826351},
note = {keyword=p2p,characterization,napa,p2pgauge category=article state=published project=napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10ijdmb.pdf}
}
[COMST-10]
Bianzino, A. P. and C.Chaudet and Rougier, J.L. and Rossi, D.,
"A Survey of Green Networking Research"
In IEEE Communications Surveys & Tutorials,
Vol. 14,
2010,
Journal
@article{DR:COMST-10,
author = {Bianzino, A. P. and C.Chaudet and Rougier, J.L. and Rossi, D.},
title = {A Survey of Green Networking Research},
journal = {IEEE Communications Surveys \& Tutorials},
year = {2010},
volume = {14},
issue = {1},
note = {keyword=green category=article state=published project=tiger2},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10comst.pdf}
}
@article{DR:WILEY-10,
author = {Rossi, D. and Fracchia, R. and Meo, M.},
title = {{On the quality of broadcast services in vehicular ad hoc networks}},
journal = {Wiley Security and Communication Networks Journal, Special Issue on QoS Assured Wireless and Mobile Networks},
year = {2010},
month = {},
note = {keyword=vanet,wireless category=article state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10scnj.pdf}
}
[IJNM-10]
Birke, R. and Mellia, M. and Petracca, M. and Rossi, D.,
"Inspecting VoIP by Measurements from a Large ISP"
In Wiley International Journal on Network Management, Special Issue on “Traffic Monitoring and Network Measurements: from Theory to Practice”,
2010,
Journal
@article{DR:IJNM-10,
author = {Birke, R. and Mellia, M. and Petracca, M. and Rossi, D.},
title = {Inspecting VoIP by Measurements from a Large ISP},
journal = {Wiley International Journal on Network Management, Special Issue on ``Traffic Monitoring and Network Measurements: from Theory to Practice''},
year = {2010},
editor = {F. Ricciato, P. Owezarski, M. Papadopouli (Eds)},
note = {keyword=measurement,voip },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10ijnm.pdf}
}
@article{DR:PPNA-10,
author = {Rossi, D. and Sottile, E. and Veglia, P.},
title = {Black-box analysis of Internet P2P applications},
year = {2010},
journal = {Peer-to-Peer Networking and Applications},
publisher = {Springer New York},
issn = {1936-6442},
pages = {1-19},
note = {keyword=p2p,p2pgauge,characterization,measurement category=inproceedings state=published project=ngi,trans},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi10ppna.pdf}
}
@techreport{DR:TECHREP-10,
title = {News from the Internet congestion control world},
author = {Rossi, Dario and Testa, C. and Valenti, S. and Veglia, P. and Muscariello, L.},
institution = {Telecom ParisTech},
year = {2010},
note = {keyword=trans,ledbat category=techrep state=project=trans},
howpublished = {http://arxiv.org/pdf/0908.0812}
}
@techreport{DR:TECHREP-10b,
title = {Rethinking low extra delay backtround transport protocols},
author = {Carofiglio, G. and Muscariello, L. and Rossi, D. and Testa, C. and Valenti, S.},
institution = {Telecom ParisTech},
year = {2010},
note = {keyword=ledbat category=techrep},
howpublished = {http://arxiv.org/pdf/1010.5623}
}
@inproceedings{DR:SIGCOMM-09,
author = {Rossi, D. and Sottile, E. and Valenti, S. and Veglia, P.},
title = {Gauging the network friendliness of P2P applications},
booktitle = {ACM SIGCOMM, Demo Session},
address = {Barcelona, Spain},
year = {2009},
month = aug,
note = {keyword=p2p,p2pgauge,demo,characterization,measurement,network awareness, category=inproceedings state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09sigcomm.pdf}
}
[ICC-09a]
Rossi, D. and Mellia, M. and Meo, M.,
"Evidences Behind Skype Outage"
IEEE International Conference on Communications (ICC’09)
jun.
2009,
Conference
@inproceedings{DR:ICC-09a,
author = {Rossi, D. and Mellia, M. and Meo, M.},
title = {Evidences Behind Skype Outage},
booktitle = {IEEE International Conference on Communications (ICC'09)},
address = {Dresde, Germany},
year = {2009},
month = jun,
note = {keyword=skype,voip,p2p,characterization,measurement category=inproceedings state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09icc-a.pdf}
}
@inproceedings{DR:ICC-09b,
author = {Muscariello, L. and Perino, D. and Rossi, D.},
title = {Do Next Generation Networks need Path Diversity},
booktitle = {IEEE International Conference on Communications (ICC'09)},
address = {Dresde, Germany},
year = {2009},
month = jun,
note = {keyword=routing,modelnette,tengineering category=inproceedings state=published project=ngi,tiger},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09icc-b.pdf}
}
[HOTP2P-09]
Horvath, A. and Telek, M. and Rossi, D. and Veglia, P. and Ciullo, D. and Garcia, M. A. and Leonardi, E. and Mellia, M.,
"Network Awareness of P2P Live Streaming Applications"
HOTP2P Workshop at IEEE IPDPS’09
may.
2009,
Conference
@inproceedings{DR:HOTP2P-09,
author = {Horvath, A. and Telek, M. and Rossi, D. and Veglia, P. and Ciullo, D. and Garcia, M. A. and Leonardi, E. and Mellia, M.},
title = {Network Awareness of P2P Live Streaming Applications},
booktitle = {HOTP2P Workshop at IEEE IPDPS'09},
address = {Rome, Italy},
year = {2009},
month = may,
note = {keyword=voip,p2p,characterization,measurement category=inproceedings state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09hotp2p.pdf}
}
@inproceedings{DR:TMA-09a,
author = {Valenti, S. and Rossi, D. and Meo, M. and M.Mellia and Bermolen, P.},
title = {Accurate and Fine-Grained Classification of P2P-TV Applications by Simply Counting Packets},
booktitle = {Traffic Measurement and Analysis (TMA), Springer-Verlag LNCS 5537},
year = {2009},
month = may,
pages = {84--92},
note = {keyword=p2p,classification,measurement,abacus category=inproceedings state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09tma-a.pdf}
}
[TMA-09b]
Finamore, A. and Mellia, M. and Meo, M. and Rossi, D.,
"KISS: Stochastic Packet Inspection"
Traffic Measurement and Analysis (TMA), Springer-Verlag LNCS 5537
may.
2009,
Conference
@inproceedings{DR:TMA-09b,
author = {Finamore, A. and Mellia, M. and Meo, M. and Rossi, D.},
title = {KISS: Stochastic Packet Inspection},
booktitle = {Traffic Measurement and Analysis (TMA), Springer-Verlag LNCS 5537},
year = {2009},
month = may,
note = {keyword=classification,measurement category=inproceedings state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09tma-b.pdf}
}
@inproceedings{DR:INFOCOM-09,
author = {Valenti, S. and Rossi, D. and Meo, M. and Mellia, M. and Bermolen, P.},
title = {An Abacus for P2P-TV traffic classification},
booktitle = {IEEE INFOCOM, Demo Session},
address = {Rio de Janeiro, Brazil},
year = {2009},
month = apr,
note = {keyword=p2p,demo,classification,measurement,abacus category=inproceedings state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09infocom.pdf}
}
@article{DR:COMNET-09a,
author = {Bermolen, P. and Rossi, D.},
title = {Support Vector Regression for Link Load Prediction},
journal = {Elsevier Computer Networks},
year = {2009},
month = feb,
volume = {53},
number = {2},
pages = {191-202},
note = {keyword=forecast,svm,measurement,tengineering category=article state=published project=ngi,tiger},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09comnet-a.pdf}
}
[COMNET-09b]
Rossi, D. and Mellia, M. and Meo, M.,
"Understanding Skype Signaling"
In Elsevier Computer Networks,
Vol. 53,
No. 2,
pp.130-140,
feb.
2009,
Journal
@article{DR:COMNET-09b,
author = {Rossi, D. and Mellia, M. and Meo, M.},
title = {Understanding Skype Signaling},
journal = {Elsevier Computer Networks},
year = {2009},
month = feb,
volume = {53},
number = {2},
pages = {130-140},
note = {keyword=skype,voip,p2p,measurement category=article state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09comnet-b.pdf}
}
[TOM-09b]
Bonfiglio, D. and Mellia, M. and Meo, M. and D.Rossi,
"Detailed Analysis of Skype Traffic"
In IEEE Transaction on Multimedia,
Vol. 11,
No. 1,
pp.117-127,
jan.
2009,
Journal
@article{DR:TOM-09b,
author = {Bonfiglio, D. and Mellia, M. and Meo, M. and D.Rossi},
title = {Detailed Analysis of Skype Traffic},
journal = {IEEE Transaction on Multimedia},
year = {2009},
month = jan,
volume = {11},
number = {1},
pages = {117-127},
note = {keyword=skype,voip,p2p,characterization,measurement category=article state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi09tmm.pdf}
}
[RECIPE-09]
Finamore, A. and Mellia, M. and Meo, M. and Rossi, D.,
"KISS: Stochastic Packet Inspection for UDP Traffic Classification"
2009,
Bookch
@inbook{DR:RECIPE-09,
author = {Finamore, A. and Mellia, M. and Meo, M. and Rossi, D.},
title = {KISS: Stochastic Packet Inspection for UDP Traffic Classification},
booktitle = {RECIPE: Robust and Efficient Traffic Classification in IP Networks},
editor = {Pescape, A. and Eds., C. Sansone},
publisher = {Fridericiana Editrice Universitaria},
isbn = {9788883380815},
address = {Napoli, Italy},
year = {2009},
note = {keyword=classification,measurement category=inbook state=published project=ngi group=rhd}
}
2008 # 13
[FITraMEn-08]
Bermolen, P. and Rossi, D.,
"Network Forecast with Support Vector Machines"
International Workshop on Traffic Management and Traffic Engineering for the Future Internet (FITraMEn 08)
dec.
2008,
Conference
@inproceedings{DR:FITraMEn-08,
author = {Bermolen, P. and Rossi, D.},
title = {Network Forecast with Support Vector Machines},
booktitle = {International Workshop on Traffic Management and Traffic Engineering for the Future Internet (FITraMEn 08)},
address = {Porto, Portugal},
year = {2008},
month = dec,
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08fitramen.pdf}
}
[COMNET-08]
Mellia, M. and Meo, M. and Muscariello, L. and Rossi, D.,
"Passive analysis of TCP anomalies"
In Elsevier Computer Networks,
Vol. 52,
No. 14,
oct.
2008,
Journal
@article{DR:COMNET-08,
author = {Mellia, M. and Meo, M. and Muscariello, L. and Rossi, D.},
title = {Passive analysis of TCP anomalies},
journal = {Elsevier Computer Networks},
year = {2008},
month = oct,
volume = {52},
number = {14},
note = {keyword=tcp,measurement category=article state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08comnet.pdf}
}
[PER-08]
Rossi, D. and Valenti, S. and Veglia, P. and Bonfiglio, D. and Mellia, M. and Meo, M.,
"Pictures from the Skype"
In ACM Performance Evaluation Review (PER),
Vol. 36,
No. 2,
pp.83–86,
sep.
2008,
Journal
@article{DR:PER-08,
author = {Rossi, D. and Valenti, S. and Veglia, P. and Bonfiglio, D. and Mellia, M. and Meo, M.},
title = {Pictures from the Skype},
journal = {ACM Performance Evaluation Review (PER)},
year = {2008},
month = sep,
volume = {36},
number = {2},
pages = {83--86},
note = {keyword=skype,voip,p2p,demo,classification,measurement category=article state=submitted project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08per.pdf}
}
[SIGMETRIC-08]
Rossi, D. and Valenti, S. and Veglia, P. and Bonfiglio, D. and Mellia, M. and Meo, M.,
"Pictures from the Skype"
ACM SIGMETRICS, Demo Competition
jun.
2008,
Conference
@inproceedings{DR:SIGMETRIC-08,
author = {Rossi, D. and Valenti, S. and Veglia, P. and Bonfiglio, D. and Mellia, M. and Meo, M.},
title = {Pictures from the Skype},
booktitle = {ACM SIGMETRICS, Demo Competition},
address = {Annapolis, MD, USA},
year = {2008},
month = jun,
note = {keyword=skype,voip,p2p,demo,classification,measurement category=inproceedings state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08sigmetrics.pdf}
}
[ICC-08]
Rossi, D. and Fracchia, R. and Meo, M.,
"VANETs: Why Beaconing at All"
IEEE International Conference on Communications (ICC’08)
may.
2008,
Conference
@inproceedings{DR:ICC-08,
author = {Rossi, D. and Fracchia, R. and Meo, M.},
title = {{VANET}s: Why Beaconing at All},
booktitle = {IEEE International Conference on Communications (ICC'08)},
address = {Beijing, China},
year = {2008},
month = may,
note = {keyword=vanet,wireless,mac category=inproceedings state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08icc.pdf}
}
[INFOCOM-08]
Bonfiglio, D. and Mellia, M. and Meo, M. and Ritacca, N. and Rossi, D.,
"Tracking Down Skype Traffic"
IEEE INFOCOM
apr.
2008,
Conference
@inproceedings{DR:INFOCOM-08,
author = {Bonfiglio, D. and Mellia, M. and Meo, M. and Ritacca, N. and Rossi, D.},
title = {{Tracking Down Skype Traffic}},
booktitle = {{IEEE INFOCOM}},
address = {Phoenix, AZ, USA},
year = {2008},
month = apr,
note = {keyword=skype,voip,p2p,measurement category=inproceedings state=published},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08infocom.pdf}
}
@article{DR:COMMAG-08,
author = {Leonardi, E. and Mellia, M. and Horvath, A. and Muscariello, L. and Niccolini, S. and Rossi, D.},
title = {Building a cooperative P2P-TV application over a Wise Network: the approach of the European FP-7 STREP NAPA-WINE},
journal = {IEEE Communication Magazine},
year = {2008},
month = apr,
volume = {64},
number = {6},
note = {keyword=p2p,measurement category=article state=published project=ngi,napawine},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08commag.pdf}
}
@inproceedings{DR:IPTPS-08,
author = {Rossi, D. and Mellia, M. and Meo, M.},
title = {A Detailed Measurement of Skype Network Traffic},
booktitle = {7th International Worshop on P2P Systems (IPTPS)},
address = {Tampa Bay (FL)},
year = {2008},
month = feb,
note = {keyword=skype,voip,p2p,measurement category=inproceedings state=published project=ngi},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi08iptps.pdf}
}
[MEDIA2000-08]
Rossi, D. and Mellia, M. and Meo, M.,
"Quando il PC diventa un telefono"
In Il Sole 24 ore / Media 2000,,
.
2008,
Journal
@article{DR:MEDIA2000-08,
author = {Rossi, D. and Mellia, M. and Meo, M.},
title = {Quando il PC diventa un telefono},
journal = {Il Sole 24 ore / Media 2000,},
year = {2008},
month = {}
}
[PATENT-US8339979B2]
Perino, M. and Mellia, M. and Rossi, D. and Meo, M.,
" Method and apparatus for detecting a single data flow in
an aggregate packet data flow and for identifying the application
generating said single data flow" , Patent WO2008149203, US8339979B2 patent, keyword=classification
2008,
Patent
@misc{DR:PATENT-US8339979B2,
author = {Perino, M. and Mellia, M. and Rossi, D. and Meo, M.},
title = { Method and apparatus for detecting a single data flow in
an aggregate packet data flow and for identifying the application
generating said single data flow},
howpublished = {Patent WO2008149203, US8339979B2 },
note = { patent, keyword=classification },
year = {2008},
patent = {True}
}
[PATENT-US20100214933A1]
Perino, M. and Mellia, M. and Rossi, D. and Meo, M.,
" Method and apparatus for detecting vocal data flow in a packet data flow" , Patent WO2008075167, US20100214933A1 patent, keyword=classification
2008,
Patent
@misc{DR:PATENT-US20100214933A1,
author = {Perino, M. and Mellia, M. and Rossi, D. and Meo, M.},
title = { Method and apparatus for detecting vocal data flow in a packet data flow},
howpublished = {Patent WO2008075167, US20100214933A1 },
note = { patent, keyword=classification },
year = {2008},
patent = {True}
}
@inproceedings{DR:INFOCOM-07,
author = {Birke, R. and Mellia, M. and Petracca, M. and Rossi, D.},
title = {{Understanding VoIP from Backbone Measurements}},
booktitle = {{IEEE INFOCOM}},
address = {Anchorage, Alaska, USA},
year = {2007},
month = may,
note = {keyword=voip,measurement,characterization category=inproceedings state=published project=ngi group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi07infocom.pdf}
}
[SPRINGER-07]
Rossi, D. and Casetti, C. and Chiasserini, C.F.,
"Some Study on Communication Performance"
2007,
Bookch
@inbook{DR:SPRINGER-07,
author = {Rossi, D. and Casetti, C. and Chiasserini, C.F.},
booktitle = {Sensor Network and Configuration},
title = {Some Study on Communication Performance},
editor = {Ed., N.P. Mahalik},
publisher = {Springer-Verlag},
address = {Berlin, Germany},
year = {2007},
note = {keyword=vanet,sensor category=inbook state=published project=ngi group=rhd}
}
@article{DR:SIGCOMM-07,
author = {Bonfiglio, D. and Mellia, M. and Meo, M. and Rossi, D. and Tofanelli, P.},
title = {Revealing Skype Traffic: When Randomness Plays with You},
journal = {ACM SIGCOMM Computer Communication Review},
volume = {37},
number = {4},
pages = {37-48},
year = {2007},
ar = {10%},
note = {keyword=skype,classification },
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi07sigcomm.pdf}
}
[PATENT-MI2007A001141]
Perino, M. and Mellia, M. and Rossi, D. and Meo, M.,
"Metodo per rilevare un singolo flusso dati all’interno di un flusso aggregato di dati a pacchetti e per identificare l’applicazione generatrice del singolo flusso dati" , Patent MI2007A001141 patent, keyword=classification
2007,
Patent
@misc{DR:PATENT-MI2007A001141,
author = {Perino, M. and Mellia, M. and Rossi, D. and Meo, M.},
title = {Metodo per rilevare un singolo flusso dati all'interno di un flusso aggregato di dati a pacchetti e per identificare l'applicazione generatrice del singolo flusso dati},
howpublished = {Patent MI2007A001141},
note = { patent, keyword=classification },
year = {2007},
patent = {True}
}
@inproceedings{DR:AUTONET-06,
author = {Fracchia, R. and Meo, M. and Rossi, D.},
title = {VANETs: To Beacon or Not To Beacon},
booktitle = {Autonet'06 Workshop at IEEE Globecom'06},
address = {San Francisco, CA, USA},
year = {2006},
month = nov,
note = {keyword=vanet category=inproceedings state=published group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi06autonet.pdf}
}
[ICC-06a]
Muscariello, L. and Meo, M. and Mellia, M. and Rossi, D.,
"Passive Measurement of TCP Anomalies,"
IEEE International Conference of Communication (ICC’06)
jun.
2006,
Conference
@inproceedings{DR:ICC-06a,
author = {Muscariello, L. and Meo, M. and Mellia, M. and Rossi, D.},
title = {Passive Measurement of TCP Anomalies,},
booktitle = {IEEE International Conference of Communication (ICC'06)},
address = {Istanbul, Turkey},
year = {2006},
month = jun,
note = {keyword=measurement category=inproceedings state=published group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi06icc-a.pdf}
}
@inproceedings{DR:ICC-06b,
author = {Rossi, D. and Mellia, M.},
title = {Real-Time TCP/IP Analysis with Common Hardware},
booktitle = {IEEE International Conference of Communication (ICC'06)},
address = {Istanbul, Turkey},
year = {2006},
month = jun,
note = {keyword=measurement category=inproceedings state=published group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi06icc-b.pdf}
}
@inproceedings{DR:ASWN-06,
author = {Fracchia, R. and Meo, M. and Rossi, D.},
title = {Avoiding broadcast storms in inter-vehicular warning delivery services},
booktitle = {6th International Workshop on Applications and Services in Wireless Networks (ASWN'06)},
address = {Berlin, Germany},
year = {2006},
month = apr,
note = {keyword=vanet category=inproceedings state=published group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi06aswn.pdf}
}
@inproceedings{DR:MP2P-06,
author = {Fracchia, R. and Meo, M. and Rossi, D.},
title = {Knowing Vehicle Location HELPs Avoiding Broadcast Packets Storm},
booktitle = {3rd IEEE International Workshop on Mobile Peer-to-Peer Computing (MP2P'06) at IEEE PerComm'06},
address = {Pisa, Italy},
year = {2006},
month = mar,
note = {keyword=vanet category=inproceedings state=published group=rhd},
howpublished = {https://perso.telecom-paristech.fr/drossi/paper/rossi06mp2p.pdf}
}
[PATENT-MI2006A002417]
Perino, M. and Mellia, M. and Rossi, D. and Meo, M.,
"Metodo ed apparato per rilevare flusso dati vocale in un flusso dati a pacchetti" , Patent MI2006A002417 patent, keyword=classification
2006,
Patent
@misc{DR:PATENT-MI2006A002417,
author = {Perino, M. and Mellia, M. and Rossi, D. and Meo, M.},
title = {Metodo ed apparato per rilevare flusso dati vocale in un flusso dati a pacchetti},
howpublished = {Patent MI2006A002417 },
note = { patent, keyword=classification },
patent = {True},
year = {2006}
}
Up to 2005 # 8
[NGI-05]
Rossi, D. and Mellia, M.,
"Persistent Gbps Link Monitoring with Tstat"
2nd EuroNGI Workshop on New Trends in Modelling, Quantitative Methods and Measurements (WP IA.8.1)
nov.
2005,
Conference
@inproceedings{DR:NGI-05,
author = {Rossi, D. and Mellia, M.},
title = {Persistent Gbps Link Monitoring with Tstat},
booktitle = {2nd EuroNGI Workshop on New Trends in Modelling, Quantitative Methods and Measurements (WP IA.8.1)},
address = {Aveiro, Portugal},
year = {2005},
month = nov,
howpublished = {https://nonsns.github.io/paper/rossi05eurongi.pdf}
}
@inproceedings{DR:ICC-04,
author = {Rossi, D. and Muscariello, L. and Mellia, M.},
title = {On the properties of TCP Flow Arrival Process},
booktitle = {IEEE International Conference of Communication (ICC'04)},
address = {Paris, France},
year = {2004},
month = jun,
howpublished = {https://nonsns.github.io/paper/rossi04icc.pdf}
}
[HPSR-04]
Rossi, D. and Giaccone, P. and Muscariello, L. and Mellia, M.,
"The performance of Switch under Real Traffic"
IEEE High Performance Switching and Routing (HPSR’04)
apr.
2004,
Conference
@inproceedings{DR:HPSR-04,
author = {Rossi, D. and Giaccone, P. and Muscariello, L. and Mellia, M.},
title = {The performance of Switch under Real Traffic},
booktitle = {IEEE High Performance Switching and Routing (HPSR'04)},
address = {Phoenix, AZ, USA},
year = {2004},
month = apr,
howpublished = {https://nonsns.github.io/paper/rossi04hpsr.pdf}
}
@inproceedings{DR:GLOBECOM-03,
author = {Rossi, D. and Casetti, C. and Mellia, M.},
title = {User Patience and the Web: a Hands-on Investigation},
booktitle = {IEEE Globecom'03},
address = {San Francisco, CA, USA},
year = {2003},
month = dec,
howpublished = {https://nonsns.github.io/paper/rossi03globecom.pdf}
}
@inproceedings{DR:GLOBECOM-02,
author = {D.Rossi and C.Casetti and M.Mellia},
title = {A Simulation Study of Web Traffic over DiffServ Networks},
booktitle = {IEEE Globecom'02,},
address = {Taipei, TW,},
year = {2002},
month = nov,
howpublished = {https://nonsns.github.io/paper/rossi02globecom.pdf}
}
@misc{DR:MSC-01,
author = {D.Rossi},
title = {A Simulation Study of Web Traffic over DiffServ Networks,},
school = {Politecnico di Torino},
booktitle = {M.Sc. Thesis published in the International Archives},
address = {},
year = {2001},
month = nov,
thesis = {True},
howpublished = {}
}