Spark Streaming

Prerequisites

sudo apt update
sudo apt install apt-transport-https -y

# java
sudo apt install default-jre -y
sudo apt install default-jdk -y

# sbt
echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
sudo apt-get update
sudo apt-get install sbt -y

# cassandra
echo "deb http://www.apache.org/dist/cassandra/debian 311x main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list
curl https://www.apache.org/dist/cassandra/KEYS | sudo apt-key add -
sudo apt-get update
sudo apt-get install cassandra -y

Create table in cqlsh

cqlsh
CREATE KEYSPACE streaming_test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 };
CREATE TABLE streaming_test.words(key text PRIMARY KEY, value int);
exit

Download Spark

mkdir -p /home/hadoop/Spark
cd /home/hadoop/Spark

wget https://archive.apache.org/dist/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz
tar -xvzf spark-2.2.1-bin-hadoop2.7.tgz

echo 'export PATH="/home/hadoop/Spark/spark-2.2.1-bin-hadoop2.7/bin:$PATH"' >> ~/.bashrc
source ~/.bashrc

results matching ""

    No results matching ""