NoSQL com Python
Gustavo Pinto@gustavopinto
gustavopinto
2004 - belém, grad, php2006 - amazontic, java, rails2008 - curitiba, msc, python2009 - aprioriti, xp, scrum2011 - recife, phd
novo século
novos problemas
O que você usaria?
O que você usaria?
NoSQL
Hash table+
Distributed
Vamos por partes..
Estrutura de dados
Desnormalização
JOIN
eventualmente consistente
Replication Factor = 3
tolerante a
falhas
BASE ACID
● Basically● Available● Soft State● Eventually
Consistent
● Atomic● Consistent● Isolated● Durable
ferramentas
Banco de dados distribuido, tolerante a falhas, escalável,
orientado a colunas
x
... em 50 GB de dados
Intalação
● download cassanda-xxx.tar.gz● cd cassandra/bin/● ./cassandra
Intalação
● download cassanda-xxx.tar.gz● cd cassandra/bin/● ./cassandra
Intalação
● download cassanda-xxx.tar.gz● cd cassandra/bin/● ./cassandra
● ./cassandra-cli -h localhost -p 9160
show keyspaces;
create keyspace pugpe;use pugpe;
create column family encontroxvi with comparator = UTF8Type;
set encontroxvi['08:30~09:00']['Titulo'] = 'Apresentacao';set encontroxvi['08:30~09:00']['Palestrante'] = 'Marcel';
set encontroxvi['08:30~09:00']['Titulo'] = 'Apresentacao';set encontroxvi['08:30~09:00']['Palestrante'] = 'Marcel';
set encontroxvi['08:30~09:00']['Titulo'] = 'Apresentacao';set encontroxvi['08:30~09:00']['Palestrante'] = 'Marcel';
set encontroxvi['08:30~09:00']['Titulo'] = 'Apresentacao';set encontroxvi['08:30~09:00']['Palestrante'] = 'Marcel';
set encontroxvi['08:30~09:00']['Titulo'] = 'Apresentacao';set encontroxvi['08:30~09:00']['Palestrante'] = 'Marcel';
set encontroxvi['09:00~09:40']['Titulo'] = 'noSQL';set encontroxvi['09:00~09:40']['Palestrante'] = 'Gustavo';set encontroxvi['09:00~09:40']['Slide'] = 'bit.ly/jhae1';
get encontroxvi['08:30~09:00'];
get encontroxvi['08:30~09:00'];
Thrift
Idiomatic low level API
Instalação
1. Download thrift-0.2.0-incubating.tar.gz2. Instale as dependências (apt-get install ..)3. ./configure (se tudo der certo)4. make (se tudo der certo..)5. make install
Instalação
1. Download thrift-0.2.0-incubating.tar.gz2. Instale as dependências (apt-get install ..)3. ./configure (se tudo der certo)4. make (se tudo der certo..)5. make install
from thrift import Thriftfrom thrift.transport import TTransportfrom thrift.transport import TSocketfrom thrift.transport import THttpClientfrom thrift.protocol import TBinaryProtocolfrom cassandra import Cassandrafrom cassandra.ttypes import *
import time
socket = TSocket.TSocket("localhost", 9160)transport = TTransport.TBufferedTransport(socket)protocol = TBinaryProtocol.TBinaryProtocol(transport)client = Cassandra.Client(protocol)
keyspace = "pugpe"column_path = ColumnPath(column_family="encontroxvi", column="palestrante")
key = "08:30~09:00"value = "Gustavo Pinto"timestamp = time.time()
try:transport.open() # ... client.insert(keyspace,key, column_path, value, timestamp,ConsistencyLevel.ZERO)
# ....
column_parent = ColumnParent(column_family="encontroxvi")
slice_range = SliceRange(start="", finish="") predicate = SlicePredicate(slice_range=slice_range)
result = client.get_slice(keyspace, key, column_parent, predicate, ConsistencyLevel.ONE)
# ...
pycassa
High level API
Instalação1. easy_install pycassa
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
import pycassa
pool = pycassa.ConnectionPool("pugpe")cf = pycassa.ColumnFamily(pool, "encontroxvi")
cf.insert('08:30~09:00', {'palestrante' : 'marcel', 'palestra' : 'abertura'})cf.insert('09:00~09:40', {'palestrante' : 'gustavopinto', 'palestra' : 'nosql', 'slide' : 'bit.ly/...'})
cf.get('08:30~09:00')cf.multiget(['08:30~09:00', '09:00~09:40'])cf.get_count('09:00~09:40')
cf.remove('09:00~09:40')
Para saber mais
Top Related