[HAWQ] External table

Posted May 26, 20204 min read

references

http://www.oushu.io/docs/ch/g...
http://www.oushu.io/docs/ch/h...

Tested software version

PostgreSQL 8.2.15

(OushuDB 3.4.0.0)
(Apache HAWQ 2.4.0.0)
(Greenplum Database 4.2.0 build 1)
on x86_64-unknown-linux-gnu,
compiled by GCC clang version 8.0.1(tags/RELEASE_801/final)
compiled on Jan 15 2020 05:46:01

grammar

DB = # \ h CREATE EXTERNAL TABLE

specification

Naming conventions

ext_XXXXXX
err_XXXXXX

The err table needs to be cleaned regularly

It is recommended to make the stored procedure clean up regularly

Create gpfdist external table

Start gpfdist service(file server)

nohup gpfdist -d/home/gpadmin -p 8888> gpfdist.log 2> & 1 &

Check if gpfdist service exists(file server)

-bash-4.2 $ps -ef | grep gpfdist
gpadmin 507192 493162 0 01:57 pts/0 00:00:00 gpfdist -d/home/gpadmin -p 8888

Create wgfdist external table

create writable external table ext_w_test001(
id bigint,
name varchar(128)

)
location('gpfdist://10.0.0.6:8888/gpextdata/test001.txt')
format 'csv'
encoding 'GB18030';

Writable test

insert into ext_w_test001 select generate_series(1,1000,1), 1;

# After inserting the data, log in to the server 10.0.0.6(file server) and execute the following shell commands
wc -l /home/gpadmin/gpextdata/test001.txt

Create readable gpfdist external table

create readable external table ext_r_test001(
id bigint,
name varchar(128)

)
location('gpfdist://10.0.0.6:8888/gpextdata/test001.txt')
format 'csv'
encoding 'GB18030'
log errors into err_r_test001
segment reject limit 10 rows;

Readable test

select count(*) from ext_r_test001;
select * from ext_r_test001 limit 5;

Delete gpfdist external table

Find out the corresponding external table and err table

Option One

    External table "public.ext_w_test001"
 Column | Type | Modifiers
-------- + ------------------------ + -----------
 id | bigint |
 name | character varying(128) |
Type:writable
Encoding:GB18030
Format type:csv
Format options:delimiter ',' null '' escape '"' quote '"'
External location:gpfdist://10.0.0.6:8888/gpextdata/test001.txt
Distributed randomly

dw = # \ d ext_r_test001
    External table "public.ext_r_test001"
 Column | Type | Modifiers
-------- + ------------------------ + -----------
 id | bigint |
 name | character varying(128) |
Type:readable
Encoding:GB18030
Format type:csv
Format options:delimiter ',' null '' escape '"' quote '"'
External location:gpfdist://10.0.0.6:8888/gpextdata/test001.txt
Segment reject limit:10 rows
Error table:err_r_test001

Option II

SELECT x.location,
x.fmttype,
x.fmtopts,
x.command,
x.rejectlimit,
x.rejectlimittype,
x.writable,

(SELECT relname
FROM pg_class WHERE Oid = x.fmterrtbl) AS errtblname,
pg_catalog.pg_encoding_to_char(x.encoding
),
x.fmterrtbl = x.reloid AS errortofile
FROM pg_catalog.pg_exttable x,
pg_catalog.pg_class c
WHERE x.reloid = c.oid
AND c.oid = 'public.ext_r_test001' ::regclass;

Delete the external table(check whether there is an error in the table creation syntax before deleting)

drop table err_r_test001;
drop external table public.ext_r_test001;

drop external table public.ext_w_test001;

Close gpfdist service(file server)

ps -ef | grep gpfdist | grep -v grep | awk '{print $2}' | xargs kill

ps -ef | grep gpfdist

Create a web external table

Start httpd service(file server)

service httpd start
service httpd status

Check if http service is available

echo "1,2"> /var/www/html/text.csv
wget 10.0.0.4:80/text.csv

Create readable web external table

create readable external web table ext_r_web_test(
id bigint,
name varchar(128)

)
location(' http://10.0.0.4:80/text.csv' )
format 'csv'
encoding 'GB18030'
log errors into err_w_web_test
segment reject limit 10 rows;

Readable test

select * from ext_r_web_test;

Close httpd service(file server)

service httpd stop

Create cmd external table

Writable cmd external table corresponding script preparation

echo "cat>/tmp/externala"> /home/gpadmin/unload.sh

Create a wmd external table

create writable external web table ext_w_cmd_web(
 id bigint,
 name varchar(128)
)
execute '/home/gpadmin/unload.sh' on 3
 format 'csv'
 encoding 'GB18030';

Writable test

insert into ext_w_cmd_web select generate_series(1,30,1), 1;

## Check the data file
gpssh -f all_hosts "cat/tmp/externala"

Readable cmd external table corresponding script preparation

echo "cat/tmp/externala"> /home/gpadmin/load.sh

Create readable cmd external table

create readable external web table ext_r_cmd_web(
 id bigint,
 name varchar(128)
)
 execute '/home/gpadmin/load.sh' on 3
 format 'csv'
 encoding 'GB18030'
 log errors into err_r_cmd_tab
 segment reject limit 10 rows;

Readable test

select count(*) from ext_r_cmd_web;
select * from ext_r_cmd_web;

Create hdfs external table

Create a writeable hdfs external table

CREATE writable EXTERNAL TABLE ext_w_hdfs_test

(id int, name text, sponsor text)
LOCATION('hdfs://oushu/hawq/default_filespace/test') FORMAT 'CSV'
encoding 'GB18030';

Writable test

insert into ext_w_hdfs_test select 1,2,3;

Create readable hdfs external table

CREATE readable EXTERNAL TABLE ext_r_hdfs_test

(id int, name text, sponsor text)
LOCATION('hdfs://oushu/hawq/default_filespace/test') FORMAT 'CSV'
encoding 'GB18030'
log errors into err_r_hdfs_tab
segment reject limit 10 rows;

Readable test

select count(*) from ext_r_hdfs_test;
select * from ext_r_hdfs_test;