@conference {PP14a, title = {Distributed Exact Deduplication for Primary Storage Infrastructures}, booktitle = {In proceedings of Distributed Applications and Interoperable Systems - IFIP}, year = {2014}, month = {June}, address = {Berlin, Germany}, abstract = {

Deduplication of primary storage volumes in a cloud computing environment is increasingly desirable, as the resulting space savings contribute to the cost effectiveness of a large scale multi-tenant infrastructure. However, traditional archival and backup deduplication systems impose prohibitive overhead for latency-sensitive applications deployed at these infrastructures while, current primary deduplication systems rely on special cluster filesystems, centralized components, or restrictive workload assumptions.

We present DEDIS, a fully-distributed and dependable system that performs exact and cluster-wide background deduplication of primary storage. DEDIS does not depend on data locality and works on top of any unsophisticated storage backend, centralized or distributed, that exports a basic shared block device interface. The evaluation of an open-source prototype shows that DEDIS scales out and adds negligible overhead even when deduplication and intensive storage I/O run simultaneously.

}, attachments = {https://haslab.uminho.pt/sites/default/files/jtpaulo/files/jtpaulo_dediscr.pdf}, author = {Jo{\~a}o Paulo and Jos{\'e} Orlando Pereira} }