目的:
hive 可以基于hadoop mr或者Spark进行高层次的数据处理
前提:
安装之前先要保证hadoop的目录可以为root用户读写:
hadoop fs -chown root:root /
1: 构建镜像
下载依赖
mkdir -p hive/image-build
cd hive/image-build
wget http://download.oracle.com/otn-pub/java/jdk/8u181-b13/96a7b8442fe848ef90c96a2fad6ed6d1/jdk-8u181-linux-x64.tar.gz
wget http://mirror.bit.edu.cn/apache/hive/hive-2.3.3/apache-hive-2.3.3-bin.tar.gz
download mysql-connector-java-5.1.47.tar.gz
tar -zxvf mysql-connector-java-5.1.47.tar.gz
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.9.1/hadoop-2.9.1.tar.gz
构建hive镜像
Dockerfile
FROM centos:7.5.1804
ADD jdk-8u181-linux-x64.tar.gz /opt
ADD hadoop-2.9.1.tar.gz /opt
ADD apache-hive-3.0.0-bin.tar.gz /opt
RUN yum install -y which && mv /opt/apache-hive-3.0.0-bin /opt/apache-hive-3.0.0
ADD mysql-connector-java-5.1.47.jar /opt/apache-hive-3.0.0/lib
ENV JAVA_HOME /opt/jdk1.8.0_181
ENV HADOOP_HOME /opt/hadoop-2.9.1
ENV HADOOP_CONF_DIR /opt/hadoop-2.9.1/etc/hadoop
ENV HIVE_HOME /opt/apache-hive-3.0.0
ENV PATH $JAVA_HOME/bin:$PATH
构建
docker build -t hive .
docker tag hive xxx.xxx.xxx.xxx:5000/hive
docker push xxx.xxx.xxx.xxx:5000/hive
2: 构建DBtool镜像
安装gradle 并编译DB tool
git clone https://github.com/chenlein/database-tools.git
cd database-tools/
unzip gradle-4.10.2-bin.zip
mkdir /opt/gradle
mv gradle-4.10.2 /opt/gradle/
edit build.gradle to remove dm driver
"compile group: 'dm', name: 'Dm7JdbcDriver', version: '7.1', classifier: 'jdk17-20170808'"
add in /etc/profile
export PATH=.:/opt/gradle/gradle-4.10.2/bin:$PATH
gradle --version
gradle build
ls build/distributions/database-tools-1.0-SNAPSHOT.tar
cp build/distributions/database-tools-1.0-SNAPSHOT.tar ./
DockerFile
FROM java:8
CMD ["mkdir", "-p", "/root/db_tools"]
WORKDIR /root/db_tools
ADD database-tools-1.0-SNAPSHOT.tar .
RUN ["chmod", "+x", "./database-tools-1.0-SNAPSHOT/bin/database-tools"]
CMD ["./database-tools-1.0-SNAPSHOT/bin/database-tools"]
编译镜像
docker build -t database-tools:1.0-SNAPSHOT .
docker tag database-tools:1.0-SNAPSHOT 172.2.2.11:5000/database-tools:1.0-SNAPSHOT
docker push 172.2.2.11:5000/database-tools:1.0-SNAPSHOT
3: 部署hive
1)部署local volume 用于hive部署的PVC
local-volumes.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: hive-data-1
labels:
type: local
app: hive
spec:
capacity:
storage: 100Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /home/hive/data1
persistentVolumeReclaimPolicy: Recycle
2)部署mysql服务
mysql.yaml
apiVersion: v1
kind: Secret
metadata:
name: hive-metadata-mysql-secret
labels:
app: hive-metadata-mysql
type: Opaque
data:
mysql-root-password: RGFtZW5nQDc3Nw==
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: hive-metadata-mysql
name: hive-metadata-mysql
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: hive-metadata-mysql
template:
metadata:
labels:
app: hive-metadata-mysql
spec:
initContainers:
- name: remove-lost-found
image: busybox:1.29.2
imagePullPolicy: IfNotPresent
command: ["rm", "-rf", "/var/lib/mysql/lost+found"]
volumeMounts:
- name: data
mountPath: /var/lib/mysql
containers:
- name: mysql
image: mysql:5.7
volumeMounts:
- name: data
mountPath: /var/lib/mysql
ports:
- containerPort: 3306
protocol: TCP
env:
- name: MYSQL_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: hive-metadata-mysql-secret
key: mysql-root-password
volumes:
- name: data
emptyDir: {}
---
kind: Service
apiVersion: v1
metadata:
labels:
app: hive-metadata-mysql
name: hive-metadata-mysql-service
spec:
ports:
- name: tcp
port: 3306
targetPort: 3306
selector:
app: hive-metadata-mysql
type: NodePort
3)部署hive配置
hive-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: hive-custom-config-cm
labels:
app: hive
data:
bootstrap.sh: |-
#!/bin/bash
set -x
cd /root/bootstrap
# Apply custom config file context
for cfg in ./*; do
if [[ ! "$cfg" =~ bootstrap.sh ]]; then
echo $cfg
cat $cfg
cat $cfg > $HIVE_HOME/conf/${cfg##*/}
fi
done
# Replace hive metadata password
sed -i 's/${HIVE_META@(==A}
=9}%H(耽ь%Y}5QQ}AMM]=I(聡хх(5聡(耽ь聡(耽нA(Mэ(((聡(
9聡ф(聡(5(聡(聥е(5(聡хх()
)Yхф聡ф)5(I]=(((())Yхф(聡(聡)((х((х((х聵х((聡(A~r/Ozp((Ё((Ё((C0(Ё(((((((((
|