@conference {bgm+13, title = {Evaluating Cassandra as a manager of large file sets}, booktitle = {Proceedings of the 3rd International Workshop on Cloud Data and Platforms (with EuroSys 2013)}, year = {2013}, month = {April}, publisher = {ACM}, organization = {ACM}, address = {Prague, Czech Republic}, abstract = {

All companies developing their business on the Web, not only giants like Google or Facebook but also small com- panies focused on niche markets, face scalability issues in data management. The case study of this paper is the content management systems for classified or commercial advertise-ments on the Web. The data involved has a very significant growth rate and a read-intensive access pattern with a reduced update rate. Typically, data is stored in traditional file systems hosted on dedicated servers or Storage Area Network devices due to the generalization and ease of use of file systems. However, this ease in implementation and usage has a disadvantage: the centralized nature of these systems leads to availability, elasticity and scalability problems. The scenario under study, undemanding in terms of the system{\textquoteright}s consistency and with a simple interaction model, is suitable to a distributed database, such as Cassandra, conceived precisely to dynamically handle large volumes of data. In this paper, we analyze the suitability of Cassandra as a substitute for file systems in content management systems. The evaluation, conducted using real data from a produc- tion system, shows that using Cassandra, one can easily get horizontal scalability of storage, redundancy across multiple independent nodes, and load distribution imposed by the periodic activities of safeguarding data, while ensuring a comparable performance to that of a file system.

}, attachments = {https://haslab.uminho.pt/sites/default/files/rco/files/clouddp-cassfs.pdf}, author = {Leander Beernaert and Pedro Gomes and Miguel Matos and Ricardo Vila{\c c}a and Rui Oliveira} }