@@ -18,7 +18,7 @@ use arroyo_operator::operator::SourceOperator;
1818use arroyo_rpc:: df:: ArroyoSchema ;
1919use arroyo_rpc:: formats:: { Format , RawStringFormat } ;
2020use arroyo_rpc:: grpc:: rpc:: { CheckpointMetadata , OperatorCheckpointMetadata , OperatorMetadata } ;
21- use arroyo_rpc:: { CheckpointCompleted , ControlMessage , ControlResp } ;
21+ use arroyo_rpc:: { CheckpointCompleted , ControlMessage , ControlResp , MetadataField } ;
2222use arroyo_types:: {
2323 single_item_hash_map, to_micros, ArrowMessage , CheckpointBarrier , SignalMessage , TaskInfo ,
2424} ;
@@ -87,6 +87,7 @@ impl KafkaTopicTester {
8787 schema_resolver : None ,
8888 client_configs : HashMap :: new ( ) ,
8989 messages_per_second : NonZeroU32 :: new ( 100 ) . unwrap ( ) ,
90+ metadata_fields : vec ! [ ] ,
9091 } ) ;
9192
9293 let ( to_control_tx, control_rx) = channel ( 128 ) ;
@@ -342,3 +343,101 @@ async fn test_kafka() {
342343 )
343344 . await ;
344345}
346+
347+ #[ tokio:: test]
348+ async fn test_kafka_with_metadata_fields ( ) {
349+ let mut kafka_topic_tester = KafkaTopicTester {
350+ topic : "__arroyo-source-test_metadata" . to_string ( ) ,
351+ server : "0.0.0.0:9092" . to_string ( ) ,
352+ group_id : Some ( "test-consumer-group" . to_string ( ) ) ,
353+ } ;
354+
355+ let mut task_info = arroyo_types:: get_test_task_info ( ) ;
356+ task_info. job_id = format ! ( "kafka-job-{}" , random:: <u64 >( ) ) ;
357+
358+ kafka_topic_tester. create_topic ( ) . await ;
359+
360+ // Prepare metadata fields
361+ let metadata_fields = vec ! [ MetadataField {
362+ field_name: "offset" . to_string( ) ,
363+ key: "offset_id" . to_string( ) ,
364+ } ] ;
365+
366+ // Set metadata fields in KafkaSourceFunc
367+ let mut kafka = KafkaSourceFunc {
368+ bootstrap_servers : kafka_topic_tester. server . clone ( ) ,
369+ topic : kafka_topic_tester. topic . clone ( ) ,
370+ group_id : kafka_topic_tester. group_id . clone ( ) ,
371+ group_id_prefix : None ,
372+ offset_mode : SourceOffset :: Earliest ,
373+ format : Format :: RawString ( RawStringFormat { } ) ,
374+ framing : None ,
375+ bad_data : None ,
376+ schema_resolver : None ,
377+ client_configs : HashMap :: new ( ) ,
378+ messages_per_second : NonZeroU32 :: new ( 100 ) . unwrap ( ) ,
379+ metadata_fields,
380+ } ;
381+
382+ let ( _to_control_tx, control_rx) = channel ( 128 ) ;
383+ let ( command_tx, _from_control_rx) = channel ( 128 ) ;
384+ let ( data_tx, _recv) = batch_bounded ( 128 ) ;
385+
386+ let checkpoint_metadata = None ;
387+
388+ let mut ctx = ArrowContext :: new (
389+ task_info. clone ( ) ,
390+ checkpoint_metadata,
391+ control_rx,
392+ command_tx,
393+ 1 ,
394+ vec ! [ ] ,
395+ Some ( ArroyoSchema :: new_unkeyed (
396+ Arc :: new ( Schema :: new ( vec ! [
397+ Field :: new(
398+ "_timestamp" ,
399+ DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ,
400+ false ,
401+ ) ,
402+ Field :: new( "value" , DataType :: Utf8 , false ) ,
403+ Field :: new( "offset" , DataType :: Int64 , false ) ,
404+ ] ) ) ,
405+ 0 ,
406+ ) ) ,
407+ None ,
408+ vec ! [ vec![ data_tx] ] ,
409+ kafka. tables ( ) ,
410+ )
411+ . await ;
412+
413+ tokio:: spawn ( async move {
414+ kafka. run ( & mut ctx) . await ;
415+ } ) ;
416+
417+ let mut reader = kafka_topic_tester
418+ . get_source_with_reader ( task_info. clone ( ) , None )
419+ . await ;
420+ let mut producer = kafka_topic_tester. get_producer ( ) ;
421+
422+ // Send test data
423+ let expected_messages: Vec < _ > = ( 1u64 ..=21 )
424+ . map ( |i| {
425+ let data = TestData { i } ;
426+ producer. send_data ( data. clone ( ) ) ;
427+ serde_json:: to_string ( & data) . unwrap ( )
428+ } )
429+ . collect ( ) ;
430+
431+ // Verify received messages
432+ reader
433+ . assert_next_message_record_values ( expected_messages. into ( ) )
434+ . await ;
435+
436+ reader
437+ . to_control_tx
438+ . send ( ControlMessage :: Stop {
439+ mode : arroyo_rpc:: grpc:: rpc:: StopMode :: Graceful ,
440+ } )
441+ . await
442+ . unwrap ( ) ;
443+ }
0 commit comments