Saturday, March 27, 2010

Fetching all keys for a column using Cassandra's Thrift API

Figured out how to get all of the Cassandra key/value pairs for a column using the thrift API. The documentation on top of the horrible naming scheme definitely doesn't make it easy to figure things out quickly. Below is an example in Python

#/usr/bin/env python

from thrift import Thrift
from thrift.transport import TTransport
from thrift.transport import TSocket
from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated
from cassandra import Cassandra
from cassandra.ttypes import *
import time

socket = TSocket.TSocket("localhost", 9160)
transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
client = Cassandra.Client(protocol)

keyspace = "MyUberSite"
user_uuid = "0ad503dd-2642-4a1e-9113-a75bfd183c34"

try:
    transport.open()

    print "UUID: ", user_uuid, "\n"

    # Create a column (user record) who's name is a UUID.
    # Add two key/value pairs:  one for email, one for username

    column_path = ColumnPath(column_family="Users", column=user_uuid)

    client.insert(
        keyspace,
        "email",
        column_path,
        "email@example.com",
        time.time(),
        ConsistencyLevel.ZERO)

    client.insert(
        keyspace,
        "username",
        column_path,
        "user_account_name",
        time.time(),
        ConsistencyLevel.ZERO)

    # Which column family are we interested in querying.
    column_parent = ColumnParent(column_family="Users")

    # A slice dictates our start and stop for column names we are interested in.
    # We only want records for one column (user) so we are going to set the 
    # start and stop to the same values which is the UUID of the user we 
    # created above. 
    slice_range = SliceRange(
        start=user_uuid, 
        finish=user_uuid)

    # Create our predicate using the range instantiated above.
    predicate = SlicePredicate(slice_range=slice_range)

    # We want all of the column's (user's) keys so we are going to specify an
    # empty key range.  If we wanted a subset of the columns keys we could
    # specify that subset here.  The range is from start to stop using the
    # sort we specified when we declared our column family in storage-conf.xml.
    key_range = KeyRange("", "")

    # Perform the query and pring the results.
    print "\n\n".join(
        map(
            repr, 
            client.get_range_slices(
                keyspace, 
                column_parent, 
                predicate, 
                key_range, 
                ConsistencyLevel.ONE)))

except Thrift.TException, tx:
    print 'Thrift: %s' % tx.message

finally:
    transport.close()

Print this post

No comments: