Spark Streaming
Prerequisites
sudo apt update
sudo apt install apt-transport-https -y
sudo apt install default-jre -y
sudo apt install default-jdk -y
echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
sudo apt-get update
sudo apt-get install sbt -y
echo "deb http://www.apache.org/dist/cassandra/debian 311x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list
curl https://www.apache.org/dist/cassandra/KEYS | sudo apt-key add -
sudo apt-get update
sudo apt-get install cassandra -y
Create table in cqlsh
cqlsh
CREATE KEYSPACE streaming_test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 };
CREATE TABLE streaming_test.words(key text PRIMARY KEY, value int);
exit
Download Spark
mkdir -p /home/hadoop/Spark
cd /home/hadoop/Spark
wget https://archive.apache.org/dist/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz
tar -xvzf spark-2.2.1-bin-hadoop2.7.tgz
echo 'export PATH="/home/hadoop/Spark/spark-2.2.1-bin-hadoop2.7/bin:$PATH"' >> ~/.bashrc
source ~/.bashrc