Bug 2230233

Summary: [s3select][json]: rgw crashes seen for query "select min() from s3object[*].phonenumbers;"
Product: [Red Hat Storage] Red Hat Ceph Storage Reporter: Hemanth Sai <hmaheswa>
Component: RGWAssignee: gal salomon <gsalomon>
Status: ASSIGNED --- QA Contact: Madhavi Kasturi <mkasturi>
Severity: high Docs Contact:
Priority: unspecified    
Version: 7.0CC: ceph-eng-bugs, cephqe-warriors
Target Milestone: ---   
Target Release: 7.0   
Hardware: Unspecified   
OS: Unspecified   
Whiteboard:
Fixed In Version: Doc Type: If docs needed, set a value
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: Bug
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:

Description Hemanth Sai 2023-08-09 04:32:08 UTC
Description of problem:
rgw daemon is crashing while executing the below query
[cephuser@ceph-hmaheswa-rhcs7-fiisfo-node6 ~]$ aws s3api select-object-content --endpoint-url http://10.0.210.237:80 --bucket bkt1 --key example1_json --expression-type 'SQL' --input-serialization '{"JSON": {"Type": "DOCUMENT"}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --expression "select min() from s3object[*].phonenumbers;" /dev/stdout

("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
[cephuser@ceph-hmaheswa-rhcs7-fiisfo-node6 ~]$


crash info:
[root@ceph-hmaheswa-rhcs7-fiisfo-node5 2023-08-09T03:53:31.840087Z_f6c6d2c6-107a-4fba-a173-87f9e88bf1ed]# cat meta 
{
    "crash_id": "2023-08-09T03:53:31.840087Z_f6c6d2c6-107a-4fba-a173-87f9e88bf1ed",
    "timestamp": "2023-08-09T03:53:31.840087Z",
    "process_name": "radosgw",
    "entity_name": "client.rgw.rgw.1.ceph-hmaheswa-rhcs7-fiisfo-node5.vnrgwe",
    "ceph_version": "18.1.2-1.el9cp",
    "utsname_hostname": "ceph-hmaheswa-rhcs7-fiisfo-node5",
    "utsname_sysname": "Linux",
    "utsname_release": "5.14.0-284.25.1.el9_2.x86_64",
    "utsname_version": "#1 SMP PREEMPT_DYNAMIC Thu Jul 20 09:11:28 EDT 2023",
    "utsname_machine": "x86_64",
    "os_name": "Red Hat Enterprise Linux",
    "os_id": "rhel",
    "os_version_id": "9.2",
    "os_version": "9.2 (Plow)",
    "backtrace": [
        "/lib64/libc.so.6(+0x54df0) [0x7ff894425df0]",
        "/usr/bin/radosgw(+0x61df04) [0x55793a39ff04]",
        "/usr/bin/radosgw(+0x6286c2) [0x55793a3aa6c2]",
        "/usr/bin/radosgw(+0xb9712c) [0x55793a91912c]",
        "/usr/bin/radosgw(+0x634c00) [0x55793a3b6c00]",
        "/usr/bin/radosgw(+0x61dc0d) [0x55793a39fc0d]",
        "/usr/bin/radosgw(+0xb144d4) [0x55793a8964d4]",
        "/usr/bin/radosgw(+0x61fe06) [0x55793a3a1e06]",
        "(RGWSelectObj_ObjStore_S3::run_s3select_on_json(char const*, char const*, unsigned long)+0x9de) [0x55793a3c62be]",
        "(RGWSelectObj_ObjStore_S3::json_processing(ceph::buffer::v15_2_0::list&, long, long)+0x633) [0x55793a3c96f3]",
        "(RGWRados::get_obj_iterate_cb(DoutPrefixProvider const*, rgw_raw_obj const&, long, long, long, bool, RGWObjState*, void*)+0x11a) [0x55793a4b7aaa]",
        "/usr/bin/radosgw(+0x730dd6) [0x55793a4b2dd6]",
        "(RGWRados::iterate_obj(DoutPrefixProvider const*, RGWObjectCtx&, RGWBucketInfo&, rgw_obj const&, long, long, unsigned long, int (*)(DoutPrefixProvider const*, rgw_raw_obj const&, long, long, long, bool, RGWObjState*, void*), void*, optional_yield)+0x428) [0x55793a4b83a8]",
        "(RGWRados::Object::Read::iterate(DoutPrefixProvider const*, long, long, RGWGetDataCB*, optional_yield)+0x134) [0x55793a4b8b64]",
        "(RGWGetObj::execute(optional_yield)+0xf59) [0x55793a2e3809]",
        "(RGWSelectObj_ObjStore_S3::execute(optional_yield)+0xc1) [0x55793a3cb5b1]",
        "(rgw_process_authenticated(RGWHandler_REST*, RGWOp*&, RGWRequest*, req_state*, optional_yield, rgw::sal::Driver*, bool)+0xa72) [0x55793a1951a2]",
        "(process_request(RGWProcessEnv const&, RGWRequest*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, RGWRestfulIO*, optional_yield, rgw::dmclock::Scheduler*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, std::chrono::duration<unsigned long, std::ratio<1l, 1000000000l> >*, int*)+0x1039) [0x55793a198719]",
        "/usr/bin/radosgw(+0xb4f176) [0x55793a8d1176]",
        "/usr/bin/radosgw(+0x371ce1) [0x55793a0f3ce1]",
        "make_fcontext()"
    ]
}
[root@ceph-hmaheswa-rhcs7-fiisfo-node5 2023-08-09T03:53:31.840087Z_f6c6d2c6-107a-4fba-a173-87f9e88bf1ed]#


json data uploaded:
{
 "firstName": "Joe",
 "lastName": "Jackson",
 "gender": "male",
 "age": "twenty",
 "address": {
 "streetAddress": "101",
 "city": "San Diego",
 "state": "CA"
 },

 "firstName": "Joe_2",
 "lastName": "Jackson_2",
 "gender": "male",
 "age": 21,
 "address": {
 "streetAddress": "101",
 "city": "San Diego",
 "state": "CA"
 },

 "phoneNumbers": [
   { "type": "home1", "number": "734928_1","addr": 11 },
   { "type": "home2", "number": "734928_2","addr": 22 },
   { "type": "home3", "number": "734928_3","addr": 33 },
   { "type": "home4", "number": "734928_4","addr": 44 },
   { "type": "home5", "number": "734928_5","addr": 55 },
   { "type": "home6", "number": "734928_6","addr": 66 },
   { "type": "home7", "number": "734928_7","addr": 77 },
   { "type": "home8", "number": "734928_8","addr": 88 },
   { "type": "home9", "number": "734928_9","addr": 99 },
   { "type": "home10", "number": "734928_10","addr": 100 }
 ],

 "key_after_array": "XXX",

 "description" : {
   "main_desc" : "value_1",
   "second_desc" : "value_2"
 }
}


Version-Release number of selected component (if applicable):
ceph version 18.1.2-1.el9cp

How reproducible:
always

Steps to Reproduce:
1.deploy rhcs7.0 ceph cluster
2.upload a json object using aws-cli
3.execute the query "select min() from s3object[*].phonenumbers;" 

Actual results:
rgw daemon is crashing

Expected results:
It should display proper error instead of crashing

Additional info:
rgw logs and crash logs are present here: http://magna002.ceph.redhat.com/ceph-qe-logs/HemanthSai/s3select_json_bz_logs/ceph-client.rgw.rgw.1.ceph-hmaheswa-rhcs7-fiisfo-node5.vnrgwe.log

rgw node:10.0.210.237
creds: root/passwd ; cephuser/cephuser

Comment 1 RHEL Program Management 2023-08-09 04:32:16 UTC
Please specify the severity of this bug. Severity is defined here:
https://bugzilla.redhat.com/page.cgi?id=fields.html#bug_severity.

Comment 3 gal salomon 2023-08-16 15:44:27 UTC
https://github.com/ceph/ceph/pull/52651