Today we're gonna learn Cassandra (it's been 5 years since I last use ScyllaDB: C++ version of Cassandra), to install Cassandra, you can use this docker-compose:
Since I failed to run latest ScyllaDB, so this notes only for Cassandra. To install cqlsh locally, you can use this command:
pip3 install cqlsh
As we already know, Cassandra is columnar database, that we have to make a partition key (where the rows will be located) and clustering key (ordering of that data inside the partition), the SSTable part works similar to Clickhouse merges.
version: '3.3'
services:
services:
testcassandra:
image: cassandra:3.11 # or latest
environment:
- HEAP_NEWSIZE=256M
- MAX_HEAP_SIZE=1G
- "JVM_OPTS=-XX:+PrintGCDateStamps"
- CASSANDRA_BROADCAST_ADDRESS
ports:
- "9042:9042"
Since I failed to run latest ScyllaDB, so this notes only for Cassandra. To install cqlsh locally, you can use this command:
pip3 install cqlsh
cqlsh 127.0.0.1 9042
As we already know, Cassandra is columnar database, that we have to make a partition key (where the rows will be located) and clustering key (ordering of that data inside the partition), the SSTable part works similar to Clickhouse merges.
To create a keyspace (much like a "database" or collection of tables but we can set replication region), use this command:
CREATE KEYSPACE my_keyspace WITH replication = {'class':
'SimpleStrategy', 'replication_factor': 1};
-- {'class' : 'NetworkTopologyStrategy', 'DC1' : '3', 'DC2' : '3'};
USE my_keyspace;
CONSISTENCY;
-- ANY
-- ONE, TWO, THREE
-- LOCAL_ONE
-- QUORUM = replication_factor / 2 + 1
-- LOCAL_QUORUM
-- EACH_QUORUM
-- ALL
CONSISTENCY new_level;
-- {'class' : 'NetworkTopologyStrategy', 'DC1' : '3', 'DC2' : '3'};
USE my_keyspace;
CONSISTENCY;
-- ANY
-- ONE, TWO, THREE
-- LOCAL_ONE
-- QUORUM = replication_factor / 2 + 1
-- LOCAL_QUORUM
-- EACH_QUORUM
-- ALL
CONSISTENCY new_level;
To create a table with same partition key and clustering/ordering key:
CREATE TABLE [keyspace.]users ( -- or TYPE for custom type
fname text,
fname text,
lname text,
title text,
PRIMARY KEY (lname, fname)
);
DESCRIBE TABLE users; -- only for 4.0+
CREATE TABLE foo (
pkey text,
okey text,
PRIMARY KEY ((pkey), okey) -- different partition and ordering
-- add WITH CLUSTERING ORDER BY (okey DESC) for descending
);
title text,
PRIMARY KEY (lname, fname)
);
DESCRIBE TABLE users; -- only for 4.0+
CREATE TABLE foo (
pkey text,
okey text,
PRIMARY KEY ((pkey), okey) -- different partition and ordering
-- add WITH CLUSTERING ORDER BY (okey DESC) for descending
);
To upsert, use insert or update command (last write wins):
INSERT INTO users (fname, lname, title)
VALUES ('A', 'B', 'C');
INSERT INTO users (fname, lname, title)
VALUES ('A', 'B', 'D'); -- add IF NOT EXISTS to prevent replace
SELECT * FROM users;
SELECT * FROM users;
UPDATE users SET title = 'E' WHERE fname = 'A' AND lname = 'C';
SELECT * FROM users;
SELECT * FROM users;
To change the schema, use usual alter table command:
ALTER TABLE users ADD mname text;
-- tinyint, smallint, int, bigint (= long)
-- variant (= the real bigint)
-- float, double
-- decimal
-- text/varchar, ascii
-- timestamp
-- date, time
-- uuid
-- timeuuid (with mac address, conflict free, set now())
-- boolean
-- inet
-- counter
-- set<type> (set {val,val}, +{val}, -{val})
-- list<type> (set [idx]=, [val,val], +[], []+, -[], DELETE [idx])
-- map<type,type> (set {key: val}, [key]=, DELETE [key] FROM)
-- tuple<type,...> (set (val,...))>
SELECT * FROM users;
-- tinyint, smallint, int, bigint (= long)
-- variant (= the real bigint)
-- float, double
-- decimal
-- text/varchar, ascii
-- timestamp
-- date, time
-- uuid
-- timeuuid (with mac address, conflict free, set now())
-- boolean
-- inet
-- counter
-- set<type> (set {val,val}, +{val}, -{val})
-- list<type> (set [idx]=, [val,val], +[], []+, -[], DELETE [idx])
-- map<type,type> (set {key: val}, [key]=, DELETE [key] FROM)
-- tuple<type,...> (set (val,...))>
SELECT * FROM users;
UPDATE users SET mname = 'F' WHERE fname = 'A' AND lname = 'D';
-- add IF col=val to prevent update (aka lightweight transaction)
SELECT * FROM users; -- add IF col=val to prevent update (aka lightweight transaction)
To create index (since Cassandra only allows retrieve by partition and cluster key or full scan):
CREATE INDEX ON users(title);
SELECT * FROM users WHERE title = 'E';
DROP INDEX users_title_idx;
SELECT * FROM users WHERE title = 'E' ALLOW FILTERING; -- full scan
SELECT * FROM users WHERE title = 'E';
DROP INDEX users_title_idx;
SELECT * FROM users WHERE title = 'E' ALLOW FILTERING; -- full scan
To create a materialized view:
CREATE MATERIALIZED VIEW users_by_title AS
SELECT * -- ALTER TABLE will automatically add this VIEW too
FROM users
WHERE title IS NOT NULL
AND fname IS NOT NULL
AND lname IS NOT NULL
PRIMARY KEY ((title),lname,fname);
SELECT * FROM users_by_title;
INSERT INTO users(lname,fname,title) VALUES('A','A','A');
SELECT * FROM users_by_title WHERE title = 'A';
DROP MATERIALIZED VIEW users_by_title;
SELECT * -- ALTER TABLE will automatically add this VIEW too
FROM users
WHERE title IS NOT NULL
AND fname IS NOT NULL
AND lname IS NOT NULL
PRIMARY KEY ((title),lname,fname);
SELECT * FROM users_by_title;
INSERT INTO users(lname,fname,title) VALUES('A','A','A');
SELECT * FROM users_by_title WHERE title = 'A';
DROP MATERIALIZED VIEW users_by_title;
To create "transaction" use BATCH statement:
BEGIN BATCH;
INSERT INTO ...
UPDATE ...
DELETE ...
APPLY BATCH;
INSERT INTO ...
UPDATE ...
DELETE ...
APPLY BATCH;
To import from file, use COPY command:
COPY users FROM 'users.csv' WITH HEADER=true;