Friday, February 17, 2017

Hive UDF and UDAF Example

How to write UDF function in Hive?

    1. Create Java class for User Defined Function which extends ora.apache.hadoop.hive.sq.exec.UDF amd implement more than one evaluate() methods and put your desisred logic and you are almost there.
    2. Package your Java class into JAR file (I am using maven)
    3. Go to Hive CLI – ADD your JAR, verify your JARs in Hive CLI classpath
    4. CREATE TEMPORARY FUNCTION in hive which points to your Java class
    5. Use it in Hive SQL and have fun!


package org.hardik.letsdobigdata;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;

public class Strip extends UDF {

private Text result = new Text();
 public Text evaluate(Text str, String stripChars) {
 if(str == null) {
 return null;
 }
 result.set(StringUtils.strip(str.toString(), stripChars));
 return result;
 }
 public Text evaluate(Text str) {
 if(str == null) {
 return null;
 }
 result.set(StringUtils.strip(str.toString()));
 return result;
 }
}


ADD JAR /root/HiveUDFs-master-0.0.1-SNAPSHOT.jar;
list jars;
CREATE TEMPORARY FUNCTION STRIP AS 'org.hardik.letsdobigdata.Strip';
CREATE TABLE IF NOT EXISTS dummy (Value1 VARCHAR(64), Value2 VARCHAR(64)) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/root/ajay.txt' INTO TABLE dummy;

select strip('hadoop','ha') from dummy;

OK
_c0
doop
doop
Time taken: 0.148 seconds, Fetched: 2 row(s)

No comments: