tjake · September 8, 2024 04:11 · ghost · Aug 11, 2014 · raju-nuovo · Aug 12, 2014
diff --git a/gistfile1.yml b/gistfile1.yml
 ### DML ###

 # Keyspace Name
 keyspace: stresscql

 # The CQL for creating a keyspace (optional if it already exists)
 keyspace_definition: |
  CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};

 # Table name
 table: blogposts

 # The CQL for creating a table you wish to stress (optional if it already exists)
 table_definition: |
  CREATE TABLE blogposts (
        domain text,
        published_at timeuuid,
        url text,
        title text,
        body text,
        PRIMARY KEY(website, published_at)
  ) WITH compaction = { 'class':'LeveledCompactionStrategy' } 
    AND comment='A table to hold blog posts'


 ### Column Distribution Specifications ###
 
 columnspec:
  - name: domain
    size: gaussian(5..100)       #domain names are relatively short
    population: uniform(1..10M)  #10M possible domains to pick from
  - name: published_at
    cluster: gaussian(1..1000)   #under each domain we should have a avg of a few hundred posts
  - name: url
    size: uniform(30..300)       
  - name: title                  #titles shouldn't go beyond 300 chars
    size: gaussian(10..200)
  - name: author
    size: uniform(5..20)         #author names should be short
  - name: body
    size: gaussian(100..5000)    #the body of the blog post can be long
   
 insert:
  partitions: uniform(1..50)      # number of unique partitions to update in a single operation
                                  # if perbatch < 1, multiple batches will be used but all partitions will
                                  # occur in all batches (unless already finished); only the row counts will vary
  pervisit: uniform(1..10)/10     # ratio of rows each partition should update in a single visit to the partition,
                                  # as a proportion of the total possible for the partition
  perbatch: ~exp(1..3)/4          # number of rows each partition should update in a single batch statement,
                                  # as a proportion of the proportion we are inserting this visit
                                  # (i.e. compounds with (and capped by) pervisit)
  batchtype: UNLOGGED             # type of batch to use

 #
 # A list of queries you wish to run against the schema
 #
 queries:
   simple1: select * from typestest where name = ? and choice = ? LIMIT 100
   range1: select * from typestest where name = ? and choice = ? and date >= ? LIMIT 100

 #
 # In order to generate data consistently we need something to generate a unique key for this schema profile.
 #
 seed: changing this string changes the generated data. its hashcode is used as the random seed.
	### DML ###

	# Keyspace Name
	keyspace: stresscql

	# The CQL for creating a keyspace (optional if it already exists)
	keyspace_definition: \|
	CREATE KEYSPACE stresscql WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};

	# Table name
	table: blogposts

	# The CQL for creating a table you wish to stress (optional if it already exists)
	table_definition: \|
	CREATE TABLE blogposts (
	domain text,
	published_at timeuuid,
	url text,
	title text,
	body text,
	PRIMARY KEY(website, published_at)
	) WITH compaction = { 'class':'LeveledCompactionStrategy' }
	AND comment='A table to hold blog posts'


	### Column Distribution Specifications ###

	columnspec:
	- name: domain
	size: gaussian(5..100) #domain names are relatively short
	population: uniform(1..10M) #10M possible domains to pick from
	- name: published_at
	cluster: gaussian(1..1000) #under each domain we should have a avg of a few hundred posts
	- name: url
	size: uniform(30..300)
	- name: title #titles shouldn't go beyond 300 chars
	size: gaussian(10..200)
	- name: author
	size: uniform(5..20) #author names should be short
	- name: body
	size: gaussian(100..5000) #the body of the blog post can be long

	insert:
	partitions: uniform(1..50) # number of unique partitions to update in a single operation
	# if perbatch < 1, multiple batches will be used but all partitions will
	# occur in all batches (unless already finished); only the row counts will vary
	pervisit: uniform(1..10)/10 # ratio of rows each partition should update in a single visit to the partition,
	# as a proportion of the total possible for the partition
	perbatch: ~exp(1..3)/4 # number of rows each partition should update in a single batch statement,
	# as a proportion of the proportion we are inserting this visit
	# (i.e. compounds with (and capped by) pervisit)
	batchtype: UNLOGGED # type of batch to use

	#
	# A list of queries you wish to run against the schema
	#
	queries:
	simple1: select * from typestest where name = ? and choice = ? LIMIT 100
	range1: select * from typestest where name = ? and choice = ? and date >= ? LIMIT 100

	#
	# In order to generate data consistently we need something to generate a unique key for this schema profile.
	#
	seed: changing this string changes the generated data. its hashcode is used as the random seed.
No results found