Scroll indicator done
728x90

# HDFS 아키텍처

블록 구조 파일  시스템

  • 사이즈는 64MB 로 설정돼 있음
  • 블록을 저장할 때 3개씩 블록의 복제본을 저장 (수정 가능)

 

# HDFS CLI

  • cat : hdfs dfs -cat URI [URI ...] 파일 내용 나타냄(압축 파일 읽기 위해서는 fs 대신 text 사용)
  • chgrp : hdfs dfs -chgrp [-R] GROUP URI [URI ...] 파일과 디렉터리의 그룹을 변경
  • cp : hdfs dfs -cp [-f] [-p | -p[topax]] URI [URI ...] <dest> 소스에 있는 파일들을 목적지로 복사

 

 

 


[실습]

docker rm master / slave1 / slave2 하고 다시 실행

C:\Users\jsl11>docker run -it -h master --name master -p 22:50070 skunivcoe/master:master
root@master:/#
C:\Users\jsl11>docker run -it -h slave1 --name slave1 --link master:master skunivcoe/slave1:slave1
root@slave1:/#
C:\Users\jsl11>docker run -it -h slave2 --name slave2 --link master:master skunivcoe/slave2:slave2
root@slave2:/#

docker 로 시작 -> 호스트에서 로컬

hdfs 로 시작 -> 로컬에서 호스트

C:\Users\jsl11>docker attach master
root@master:/#
root@master:/# ls

bin  boot  dev  etc  home  lib  lib64  media  mnt  opt  proc  root  run  sbin  srv  sys  tmp  usr  var

root@master:/# cd
root@master:~# cd coe/hadoop/hadoop-2.8.3
root@master:~/coe/hadoop/hadoop-2.8.3# ls

LICENSE.txt  README.txt  etc          include  libexec  pid   share
NOTICE.txt   bin         hadoop_data  lib      logs     sbin  word_test.txt

hia가 없음

C:\Users\jsl11>docker cp hia.txt master:root/coe/hadoop/hadoop-2.8.3

root@master:~/coe/hadoop/hadoop-2.8.3# ls

LICENSE.txt  README.txt  etc          hia.txt  lib      logs  sbin   word_test.txt
NOTICE.txt   bin         hadoop_data  include  libexec  pid   share

C:\Users\jsl11>dir w*

 C 드라이브의 볼륨에는 이름이 없습니다.
 볼륨 일련 번호: 7CE4-7BD1

 C:\Users\jsl11 디렉터리

파일을 찾을 수 없습니다.

C:\Windows\system32>docker cp master:root/coe/hadoop/hadoop-2.8.3 word_test.txt (**관리자 권한으로 cmd)

root@master:~/coe/hadoop/hadoop-2.8.3# sbin/start-all.sh

This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [master]
master: starting namenode, logging to /root/coe/hadoop/hadoop-2.8.3/logs/hadoop-root-namenode-master.out
slave2: starting datanode, logging to /root/coe/hadoop/hadoop-2.8.3/logs/hadoop-root-datanode-slave2.out
slave1: starting datanode, logging to /root/coe/hadoop/hadoop-2.8.3/logs/hadoop-root-datanode-slave1.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /root/coe/hadoop/hadoop-2.8.3/logs/hadoop-root-secondarynamenode-master.out

starting yarn daemons
starting resourcemanager, logging to /root/coe/hadoop/hadoop-2.8.3/logs/yarn--resourcemanager-master.out
slave1: starting nodemanager, logging to /root/coe/hadoop/hadoop-2.8.3/logs/yarn-root-nodemanager-slave1.out
slave2: starting nodemanager, logging to /root/coe/hadoop/hadoop-2.8.3/logs/yarn-root-nodemanager-slave2.out

root@master:~/coe/hadoop/hadoop-2.8.3# jps

225 NameNode
981 Jps
472 SecondaryNameNode
697 ResourceManager

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -ls /

Found 1 items
drwxr-xr-x   - root supergroup          0 2018-02-26 10:22 /user

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -ls /user

Found 2 items
drwxr-xr-x   - root supergroup          0 2018-02-26 10:16 /user/input
drwxr-xr-x   - root supergroup          0 2018-02-26 10:22 /user/output2

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -ls /user/input

Found 1 items
-rw-r--r--   2 root supergroup         71 2018-02-26 10:16 /user/input/word_test.txt

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -ls /user/output2

Found 2 items
-rw-r--r--   2 root supergroup          0 2018-02-26 10:22 /user/output2/_SUCCESS
-rw-r--r--   2 root supergroup         54 2018-02-26 10:22 /user/output2/part-r-00000

 

output2 : 이미 돌려진 결과

output2 폴더 없애기 (있으면 오버라이팅을 못함)

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -rm -r /user/output2

Deleted /user/output2

-r : 해당 폴더 안의 하위 폴더까지 삭제

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /user/input/word_test.txt /user/output2

...
...
22/09/27 07:29:54 INFO mapred.LocalJobRunner: Finishing task: attempt_local578100116_0001_r_000000_0
22/09/27 07:29:54 INFO mapred.LocalJobRunner: reduce task executor complete.
22/09/27 07:29:55 INFO mapreduce.Job: Job job_local578100116_0001 running in uber mode : false
22/09/27 07:29:55 INFO mapreduce.Job:  map 100% reduce 100%
22/09/27 07:29:55 INFO mapreduce.Job: Job job_local578100116_0001 completed successfully
22/09/27 07:29:55 INFO mapreduce.Job: Counters: 35
        File System Counters
                FILE: Number of bytes read=604398
                FILE: Number of bytes written=1340946
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=142
                HDFS: Number of bytes written=54
                HDFS: Number of read operations=13
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=4
        Map-Reduce Framework
                Map input records=6
                Map output records=10
                Map output bytes=110
                Map output materialized bytes=84
                Input split bytes=108
                Combine input records=10
                Combine output records=6
                Reduce input groups=6
                Reduce shuffle bytes=84
                Reduce input records=6
                Reduce output records=6
                Spilled Records=12
                Shuffled Maps =1
                Failed Shuffles=0
                Merged Map outputs=1
                GC time elapsed (ms)=6
                Total committed heap usage (bytes)=667418624
        Shuffle Errors
                BAD_ID=0
                CONNECTION=0
                IO_ERROR=0
                WRONG_LENGTH=0
                WRONG_MAP=0
                WRONG_REDUCE=0
        File Input Format Counters
                Bytes Read=71
        File Output Format Counters
                Bytes Written=54

결과를 /user/output2 (hdfs 에 위치) 에 넣은 것

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -cat /user/output2/*

computer        2
fighting        1
hadoop  2
hard    1
hello   2
study   2

hia.txt 도 wordcount 해보기 (output2 삭제하고)

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -rm -r /user/output2

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -put hia.txt /user/input

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -ls /user/input
Found 2 items
-rw-r--r--   2 root supergroup     820015 2022-09-27 08:00 /user/input/hia.txt
-rw-r--r--   2 root supergroup         71 2018-02-26 10:16 /user/input/word_test.txt

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.3.jar wordcount /user/input/hia.txt /user/output2

...
...
22/09/27 08:03:28 INFO mapred.LocalJobRunner: Finishing task: attempt_local1559642853_0001_r_000000_0
22/09/27 08:03:28 INFO mapred.LocalJobRunner: reduce task executor complete.
22/09/27 08:03:29 INFO mapreduce.Job: Job job_local1559642853_0001 running in uber mode : false
22/09/27 08:03:29 INFO mapreduce.Job:  map 100% reduce 100%
22/09/27 08:03:29 INFO mapreduce.Job: Job job_local1559642853_0001 completed successfully
22/09/27 08:03:29 INFO mapreduce.Job: Counters: 35
        File System Counters
                FILE: Number of bytes read=1082028
                FILE: Number of bytes written=2060950
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=1640030
                HDFS: Number of bytes written=180854
                HDFS: Number of read operations=13
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=4
        Map-Reduce Framework
                Map input records=11479
                Map output records=103933
                Map output bytes=1080997
                Map output materialized bytes=238902
                Input split bytes=102
                Combine input records=103933
                Combine output records=14884
                Reduce input groups=14884
                Reduce shuffle bytes=238902
                Reduce input records=14884
                Reduce output records=14884
                Spilled Records=29768
                Shuffled Maps =1
                Failed Shuffles=0
                Merged Map outputs=1
                GC time elapsed (ms)=9
                Total committed heap usage (bytes)=729808896
        Shuffle Errors
                BAD_ID=0
                CONNECTION=0
                IO_ERROR=0
                WRONG_LENGTH=0
                WRONG_MAP=0
                WRONG_REDUCE=0
        File Input Format Counters
                Bytes Read=820015
        File Output Format Counters
                Bytes Written=180854

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -cat /user/output2/*

...
...
...
“regular       1
“remotely”    1
“running       1
“shards.”     1
“sho,”        1
“shortForm     1
“shuffle”     2
“similar       1
“split”       1
“started       1
“statistical” 1
“stumble,”    1
“sure”        1
“the” 2
“think”       1
“time”);      1
“username”,   1
“verbose”     1
“was   1
“web   1
“which”       1
“”;   3
”ASSCODE”,”CLAIMS”,”NCLASS”,”CAT”,”SUBCAT”,”CMADE”,”CRECEIVE”,    1
”RATIOCIT”,”GENERAL”,”ORIGINAL”,”FWDAPLAG”,”BCKGTLAG”,”SELFCTUB”,   1
”SELFCTLB”,”SECDUPBD”,”SECDLWBD”  1
”reporter:counter:SkippingTaskCounters,MapProcessedRecords,1\n”)      1
”reporter:counter:SkippingTaskCounters,ReduceProcessedGroups,1\n”)    1
…      5
…]     4
≈       1
■      157
➞       2
➥       94
➥/user/chuck/example.txt        2

하둡 시작하기 전에 sbin/start-all.sh 할 것

하둡 정상적 종료 반드시 sbin/stop-all.sh

C:\Users\jsl11>docker attach master
root@master:~/coe/hadoop/hadoop-2.8.3#
root@master:~/coe/hadoop/hadoop-2.8.3# sbin/stop-all.sh
This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh
Stopping namenodes on [master]
master: stopping namenode
slave2: stopping datanode
slave1: stopping datanode
Stopping secondary namenodes [0.0.0.0]
0.0.0.0: stopping secondarynamenode
stopping yarn daemons
stopping resourcemanager
slave1: stopping nodemanager
slave2: stopping nodemanager
slave1: nodemanager did not stop gracefully after 5 seconds: killing with kill -9
slave2: nodemanager did not stop gracefully after 5 seconds: killing with kill -9
no proxyserver to stop

파일 넣기

root@master:~/coe/hadoop/hadoop-2.8.3# bin/hdfs dfs -put reLaw.txt /user/input


**과제

hia.txt wordcount 시, 많은 양인데 빈도의 정렬이 아니라 알파벳 순으로 되어있음

i) 최빈 단어 10개 뽑기

ii) 대한민국 헌법 전문 텍스트 파일 똑같이 최빈 단어 순으로 상위 10개  -> 한글 처리를 볼려는 것

 

** 문제가 생겼을 때 master jps 4개 / slave1 jps 2 3개 확인 **

 

 

 

728x90