Hadoop 集群搭建 (十一):Flume

Flume:xx 托我给您带个话…

clark-young-160446-unsplash

Flume 安装配置

安装 Flume

  • cd /mnt/hgfs/Hadoop
  • cp apache-flume-1.9.0-bin.tar.gz /usr/local/src/
  • cd /usr/local/src/
  • tar zxvf apache-flume-1.9.0-bin.tar.gz
  • rm -rf apache-flume-1.9.0-bin.tar.gz

Hbase 环境变量:

  • vim ~/.bashrc
# 添加如下信息
# SET FLUME PATH
export FLUME_HOME=/usr/local/src/apache-flume-1.9.0-bin
export PATH=$PATH:$FLUME_HOME/bin
  • source ~/.bashrc

修改 Flume 配置

  • cd apache-flume-1.9.0-bin/conf
  • cp flume-env.sh.template flume-env.sh
  • vim flume-env.sh
# 添加如下信息
export JAVA_HOME=/usr/local/src/jdk1.8.0_212

新增配置文件

NetCat:

  • vim flume-netcat.conf
# 添加如下信息
# Name the components on this agent
agent.sources = r1
agent.sinks = k1
agent.channels = c1

# Describe/configuration the source
agent.sources.r1.type = netcat
agent.sources.r1.bind = 127.0.0.1
agent.sources.r1.port = 44444

# Describe the sink
agent.sinks.k1.type = logger

# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1

Exec:

  • vim flume-exec.conf
# 添加如下信息
# Name the components on this agent
agent.sources = r1
agent.sinks = k1
agent.channels = c1

# Describe/configuration the source
agent.sources.r1.type = exec
agent.sources.r1.command = tail -f /data/hadoop/flume/test.txt

# Describe the sink
agent.sinks.k1.type = logger

# Use a channel which buffers events in memory
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1

Avro:

  • vim flume-avro.conf
# 添加如下信息
# Define a memory channel called c1 on agent
agent.channels.c1.type = memory

# Define an avro source alled r1 on agent and tell it
agent.sources.r1.channels = c1
agent.sources.r1.type = avro
agent.sources.r1.bind = 127.0.0.1
agent.sources.r1.port = 44444

# Describe/configuration the source
agent.sinks.k1.type = hdfs
agent.sinks.k1.channel = c1
agent.sinks.k1.hdfs.path = hdfs://master:9000/flume_data_pool
agent.sinks.k1.hdfs.filePrefix = events-
agent.sinks.k1.hdfs.fileType = DataStream
agent.sinks.k1.hdfs.writeFormat = Text
agent.sinks.k1.hdfs.rollSize = 0
agent.sinks.k1.hdfs.rollCount= 600000
agent.sinks.k1.hdfs.rollInterval = 600

agent.channels = c1
agent.sources = r1
agent.sinks = k1

验证

NetCat:

# 服务端

  • flume-ng agent --conf conf --conf-file conf/flume-netcat.conf --name=agent -Dflume.root.logger=INFO,console

# 客户端

  • flume-ng agent --conf conf --conf-file conf/flume-netcat.conf --name=agent -Dflume.root.logger=INFO,console

Exec:

# 服务端

  • flume-ng agent --conf conf --conf-file conf/flume-exec.conf --name=agent -Dflume.root.logger=INFO,console

# 客户端

Avro:

# 服务端

  • flume-ng agent --conf conf --conf-file conf/flume-netcat.conf --name=agent -Dflume.root.logger=DEBUG,console

# 客户端

  • telnet master 44444