To be able to use custom endpoints with the latest Spark distribution, one needs to add an external package (hadoop-aws). Then, custum endpoints can be configured according to docs.
bin/spark-shell --packages org.apache.hadoop:hadoop-aws:2.7.2
| import { Table, Vector, Field, Utf8, Type, Schema } from 'apache-arrow'; | |
| /** | |
| * Cast all columns with complex data types in an Apache Arrow Table to strings | |
| * @param {Table} table - The Apache Arrow Table | |
| * @returns {Table} - A new Table with all complex data type columns cast to strings | |
| */ | |
| function castComplexColumnsToString(table: Table): Table { | |
| const schemaFields = table.schema.fields; |
| #!/bin/bash | |
| FUNCTION_NAME=$1 | |
| for region in $(aws --output text ec2 describe-regions | cut -f 4) | |
| do | |
| echo "Checking $region" | |
| for loggroup in $(aws --output text logs describe-log-groups --log-group-prefix "/aws/lambda/us-east-1.$FUNCTION_NAME" --region $region --query 'logGroups[].logGroupName') | |
| do | |
| echo "Found '$loggroup' in region $region" | |
| for logstream in $(aws --output text logs describe-log-streams --log-group-name $loggroup --region $region --query 'logStreams[].logStreamName') |
| { | |
| "Type": "AWS::IAM::Role", | |
| "Properties": { | |
| "AssumeRolePolicyDocument": { | |
| "Version": "2012-10-17", | |
| "Statement": [{ | |
| "Effect": "Allow", | |
| "Principal": { | |
| "Service": [ | |
| "lambda.amazonaws.com", |
| { | |
| "us-east-1": { | |
| "city": "Ashburn", | |
| "state": "Virginia", | |
| "country": "United States", | |
| "countryCode": "US", | |
| "latitude": 38.9445, | |
| "longitude": -77.4558029, | |
| "region": "North America", | |
| "iataCode": "IAD" |
| #!/bin/bash | |
| curl -s https://hub.docker.com/v2/repositories/$1/\?page_size\=1000 | jq -r '["user", "name", "description", "star_count", "pull_count"] as $fields | $fields, (.results[] | [.[$fields[]]]) | @csv' | |
To be able to use custom endpoints with the latest Spark distribution, one needs to add an external package (hadoop-aws). Then, custum endpoints can be configured according to docs.
bin/spark-shell --packages org.apache.hadoop:hadoop-aws:2.7.2
| var phantom = require('phantom'); | |
| var async = require('async'); | |
| var pagesToCall = [ | |
| ['http://www.google.com', 8000], | |
| ['http://www.allthingsd.com', 8001], | |
| ['http://www.wired.com', 8002], | |
| ['http://www.mashable.com', 8003], | |
| ['http://www.stackoverflow.com', 8004] | |
| ]; |