Tuesday, July 10, 2012

How to tokenize text in Oracle

The following procedure splits text column query into keywords,e.g., "Hello World" into two words "Hello" and "World".


Step 1. Create index of type ctxsys.contex.
create index t_query_idx on t_query(query) indextype is ctxsys.context;


Step 2.
set serveroutput on;
declare
the_tokens ctx_doc.token_tab;
begin
for i in (select row_p_key from t_query order by row_p_key) loop
ctx_doc.tokens('t_query_idx', i.row_p_key, the_tokens);
for ii in 1..the_tokens.count loop
insert into tbl_query_token select i.row_p_key,the_tokens(ii).offset, the_tokens(ii).token from dual;
end loop;
commit;
end loop;
end;
/

No comments: