Hive UDF class
package org.puneetha.hive.udf; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; import org.apache.hadoop.hive.ql.exec.Description; /*** * * * @author Puneetha * */ @Description(name = "udf_concat" , value = "_FUNC_(STRING, STRING) - RETURN_TYPE(STRING)\n" + "Description: Concatenate two strings, separated by spaces" , extended = "Example:\n" + " > SELECT udf_concat('hello','world') FROM src;\n" + " hello world" ) @UDFType(deterministic=true) public final class ConcatStr extends UDF { private static final Logger logger = Logger.getLogger(Thread.currentThread().getStackTrace()[0].getClassName()); public Text evaluate(final Text param1, final Text param2) throws HiveException { logger.debug("param1=" + param1.toString() + "; param2=" + param2.toString()); String result = ""; String separator = " "; result = param1.toString() + separator + param2.toString(); return new Text(result); } }
Test case – TestNG
package org.puneetha.hive.udf; import org.apache.hadoop.io.Text; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; /*** * * * @author Puneetha * */ public class ConcatStrTest { @DataProvider(name = "dataProvider") public static String[][] inputData() { String[][] testStrSet = { {"hello" , "world" , "hello world"}, {"this is an" , "example" , "this is an example!"} }; return testStrSet; } @Test(dataProvider = "dataProvider" ) public void testEvaluate(String param1, String param2, String expectedResultStr) throws Exception { ConcatStr concatStr1 = new ConcatStr(); try{ Assert.assertEquals(new Text(expectedResultStr), concatStr1.evaluate(new Text(param1), new Text(param2)) ); }catch(Exception e){ e.printStackTrace(); Assert.fail(); } } }
log4j.properties
# Root logger option log4j.rootLogger=DEBUG, stdout # Direct log messages to stdout log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.Target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
pom.xml
4.0.0 custom org.puneetha 0.0.1-SNAPSHOT jar hive_udf http://maven.apache.org hive_udf_v1 UTF-8 cdh5.5.2 2.6.0-${cdh.version} 1.1.0-${cdh.version} 0.12.0-${cdh.version} 1.2.17 2.5 1.2.1 6.9.10 4.8.1 log4j log4j ${log4j.version} org.testng testng ${testng.version} org.apache.hadoop hadoop-client ${hadoop.version} org.apache.hive hive-jdbc ${hive.version} org.apache.hive hive-metastore ${hive.version} org.apache.hive hive-service ${hive.version} org.apache.pig pig ${pig.version} org.apache.pig pigunit ${pig.version} org.apache.maven.plugins maven-clean-plugin ${maven_jar_plugin.version} ${project.finalname} org.codehaus.mojo exec-maven-plugin ${codehaus.version} org.apache.maven.plugins maven-jar-plugin ${maven_jar_plugin.version} cloudera-repo http://repository.cloudera.com/artifactory/cloudera-repos/
Deploy Hive UDF
Deploy a function:
CREATE FUNCTION udf_concat AS 'org.puneetha.hive.udf.ConcatStr' USING JAR 'hdfs:///jars/hive_udf_v1.jar';
Using the UDF:
SELECT udf_concat('hello','world'); Output: hello world
Describe function usage:
DESCRIBE FUNCTION udf_concat; Output: +--------------------------------------------------------------+--+ | tab_name | +--------------------------------------------------------------+--+ | _FUNC_(STRING, STRING) - RETURN_TYPE(STRING) | | Description: Concatenate two strings, separated by spaces | +--------------------------------------------------------------+--+ DESCRIBE FUNCTION EXTENDED udf_concat; Output: +--------------------------------------------------------------+--+ | tab_name | +--------------------------------------------------------------+--+ | _FUNC_(STRING, STRING) - RETURN_TYPE(STRING) | | Description: Concatenate two strings, separated by spaces | | Example: | | > SELECT udf_concat('hello','world') FROM src; | | hello world | +--------------------------------------------------------------+--+
HI Puneetha,
Wonderful, it worked for me.. thanks a lot! I am going to follow your blog from now on.
Thanks,