12c新特性:外部表分区

从12.2版本开始允许对外部表进行分区化,让外部表也能享受到分区带来的便利性,比如分区裁剪和智能分区关联等。对于现在外部数据量较大的情况,对外部表的分区提供更高的查询效率和提升了数据的可维护性。

外部表分区与其他分区表类似,但是也有一些差异。外部表的文件可以存储在文件系统上,或者Hadoop分布式文件系统上。绝大部分一般分区策略都可以适用在分区外部表上,对于一个分区外部表来说,每个分区的存储都可以单独通过指定目录和文件来确定。

创建分区外部表

如果要创建一个简单的分区外部表,只需要通过CREATE TABLE语句配合ORGANIZATION短句再加上PARTITION BY,如果是复合外部分区表,则要加上SUBPARTITION BYPARTITION BYSUBPARTITION BY指定了每个分区和子分区的外部文件存放的位置。

新建两个目录,分别赋权给用户

create or replace directory dir1 as '/tmp/dir1';
create or replace directory dir2 as '/tmp/dir2';

grant read,write on directory dir1 to xb;
grant read,write on directory dir2 to xb;

构建一些数据,生成外部文件

set markup csv on quote on
set trimspool on linesize 1000 feedback off pagesize 0

set echo off
set feedback off
set pagesize 0
set termout off
set heading off
spool /tmp/p1.txt
select name, object_id, owner, object_name
  from (select 'HU' name, rownum rn, object_id, owner, object_name
          from dba_objects)
 where rn <= 500;
spool off

spool /tmp/p2.txt
select name, object_id, owner, object_name
  from (select 'BJ' name, rownum rn, object_id, owner, object_name
          from dba_objects)
 where rn <= 1000
   and rn > 500;
spool off

spool /tmp/p3.txt
select name, object_id, owner, object_name
  from (select 'SH' name, rownum rn, object_id, owner, object_name
          from dba_objects)
 where rn <= 1500
   and rn >1000;
spool off

然后根据上面生成的三个外部文件来创建外部表,每个分区可以指定单独的目录

DROP TABLE t_ext purge;

create table t_ext (
  name  varchar2(2),
  object_id     number,
  owner         varchar2(100),
  object_name   varchar2(100)
)
organization external (
  type oracle_loader
  default directory dir1
  access parameters (
    fields csv with embedded terminated by ',' optionally enclosed by '"'
    missing field values are null (
      name,
      object_id,
      owner,
      object_name 
    )
  )
)
reject limit unlimited
partition by list (name) (
  partition p1 values ('HU') location ('p1.txt'),
  partition p2 values ('BJ') location ('p2.txt'),
  partition p3 values ('SH') default directory dir2 location ('p3.txt'),
  partition pdefault values (default)
);

数据查询都正常

xb@PDB12C> select * from t_ext partition(p1) where rownum<10;

NA  OBJECT_ID OWNER                OBJECT_NAME
-- ---------- -------------------- --------------------
HU         16 SYS                  TS$
HU         20 SYS                  ICOL$
HU          8 SYS                  C_FILE#_BLOCK#
HU         37 SYS                  I_OBJ2
HU         22 SYS                  USER$
HU         33 SYS                  I_TAB1
HU         40 SYS                  I_OBJ5
HU         31 SYS                  CDEF$
HU         41 SYS                  I_IND1

9 rows selected.

xb@PDB12C> select * from t_ext partition(p2) where rownum<10;

NA  OBJECT_ID OWNER                OBJECT_NAME
-- ---------- -------------------- --------------------
BJ        503 SYS                  SVCOBJ_ACCESS$
BJ        504 SYS                  IDX1_SVCOBJ_ACCESS$
BJ        505 SYS                  SVCOBJ_ACCESS_ATTR$
BJ        506 SYS                  CODEAUTH$
BJ        507 SYS                  I_CODEAUTH1
BJ        508 SYS                  OBJECT_GRANT
BJ        509 SYS                  SYSTEM_GRANT
BJ        510 SYS                  USER_GRANT
BJ        512 SYS                  AUDSES$

9 rows selected.

xb@PDB12C> select * from t_ext partition(p3) where rownum<10;

NA  OBJECT_ID OWNER                OBJECT_NAME
-- ---------- -------------------- --------------------
SH       1007 SYS                  STREAMS$_DEST_OBJS
SH       1008 SYS                  STREAMS$_DEST_OBJS_I
SH       1020 SYS                  STREAMS$_SM_ID
SH       1021 SYS                  FGR$_FILE_GROUPS
SH       1022 SYS                  I_FGR$_FILE_GROUPS1
SH       1032 SYS                  FGR$_TABLESPACE_INFO
SH       1035 SYS                  FGR$_TABLE_INFO
SH       1036 SYS                  I_FGR$_TABLE_INFO1
SH       1037 SYS                  I_FGR$_TABLE_INFO2

9 rows selected.

这里默认的目录是DIR1,但是单独为P3分区指定了目录DIR2。除了指定default directory,也可以用LOCATION语句

partition p3 values ('SH') LOCATION ('dir2:p3.txt')

收集表的统计信息,查看数据的分布情况

exec dbms_stats.gather_table_stats(user, 't_ext');

col table_name format a20
col partition_name format a20
col high_value format a20

select table_name,
       partition_name,
       high_value,
       num_rows
from   user_tab_partitions
where  table_name = 'T_EXT'
order by 1, 2;

TABLE_NAME           PARTITION_NAME       HIGH_VALUE             NUM_ROWS
-------------------- -------------------- -------------------- ----------
T_EXT                P1                   'HU'                        500
T_EXT                P2                   'BJ'                        500
T_EXT                P3                   'SH'                        500
T_EXT                PDEFAULT             default                       0

子分区外部表

DROP TABLE t_ext purge;

create table t_ext (
  name  varchar2(2),
  object_id     number,
  owner         varchar2(100),
  object_name   varchar2(100)
)
organization external (
  type oracle_loader
  default directory dir1
  access parameters (
    fields csv with embedded terminated by ',' optionally enclosed by '"'
    missing field values are null (
      name,
      object_id,
      owner,
      object_name 
    )
  )
)
reject limit unlimited
partition by list (name)
subpartition by range (object_id) (
 partition p1 values ('HU') (
   subpartition p1_s1 values less than (200) location ('p1.txt'),
   subpartition p1_smax values less than (maxvalue) location ('p1.txt')
 ),
 partition p2 values ('BJ') (
   subpartition p2_s1 values less than (200) location ('p2.txt'),
   subpartition p2_smax values less than (maxvalue) location ('p2.txt')
 ),
 partition p3 values ('SH') default directory dir2(
   subpartition p3_s1 values less than (200) location ('p3.txt'),
   subpartition p3_smax values less than (maxvalue) location ('p3.txt')
 )
);

收集表的统计信息,查看数据的分布情况

exec dbms_stats.gather_table_stats(user, 't_ext');

col table_name format a20
col partition_name format a20
col subpartition_name for a20
col high_value format a20

select table_name,
       partition_name,
       subpartition_name,
       high_value,
       num_rows
from   user_tab_subpartitions
where  table_name = 'T_EXT'
order by 1, 2;

TABLE_NAME           PARTITION_NAME       SUBPARTITION_NAME    HIGH_VALUE             NUM_ROWS
-------------------- -------------------- -------------------- -------------------- ----------
T_EXT                P1                   P1_S1                200                         500
T_EXT                P1                   P1_SMAX              MAXVALUE                    500
T_EXT                P2                   P2_S1                200                         500
T_EXT                P2                   P2_SMAX              MAXVALUE                    500
T_EXT                P3                   P3_S1                200                         500
T_EXT                P3                   P3_SMAX              MAXVALUE                    500