question

Upvotes
Accepted
23 1 1 4

Parsing MRN TRNA single fragment data

From the MRN Elektron DAta Models spec:

For a given RIC-MRN_SRC-GUID combination, when a data item requires only a single message, then TOT_SIZE will equal the number of bytes in the FRAGMENT and FRAG_NUM will be 1.

In my code:

If

ntaItem->m_expectedSize > 0 && ntaItem->m_expectedSize == ntaItem->m_fragmentBuffer.length;

then:

decompressedBuf.data = (char*)calloc(ntaItem.m_fragmentBuffer.length * 4, sizeof(char));

decompress(&ntaItem.m_fragmentBuffer, &decompressedBuf);

The decompress function is as follows:

RsslRet NTAConsumer::decompress(const RsslBuffer* _compressed, RsslBuffer* _decompressed)
{
    unsigned int unzipBufferLength;
    unsigned int additionalUnzipBufferLength;
    char*        unzipBuffer;
    char*        unzipBuffer2;


    if (_compressed->length == 0)
    {
        _decompressed->data   = _compressed->data;
        _decompressed->length = _compressed->length;
    }

    unzipBufferLength = _compressed->length * 4;  // create a buffer that is about large enough to hold the output
    additionalUnzipBufferLength = unzipBufferLength;
    unzipBuffer = (char*)calloc(unzipBufferLength, sizeof(char));

    z_stream zStrm;
    zStrm.zalloc    = Z_NULL;
    zStrm.zfree     = Z_NULL;
    zStrm.next_in   = (Bytef*)_compressed->data;
    zStrm.avail_in  = _compressed->length;
    zStrm.total_out = 0; 

    RsslBool finished = RSSL_FALSE;
    if (inflateInit2(&zStrm, (16 + MAX_WBITS)) != Z_OK)
    {    
        free(unzipBuffer);
        LOG(TAEL_ERROR, "Unable to decompress data: inflateInit2 failed");
        return RSSL_RET_FAILURE;
    }    
    while (!finished)
    {    
        // increase buffer size if it's too small
        if (zStrm.total_out >= unzipBufferLength)
        {
            unzipBuffer2 = (char*)calloc(unzipBufferLength + additionalUnzipBufferLength, sizeof(char));
            memcpy(unzipBuffer2, unzipBuffer, unzipBufferLength);
            unzipBufferLength += additionalUnzipBufferLength;
            free(unzipBuffer);
            unzipBuffer = unzipBuffer2;
        }
        zStrm.next_out  = (Bytef*)(unzipBuffer + zStrm.total_out);
        zStrm.avail_out = unzipBufferLength - zStrm.total_out;

        int error = inflate(&zStrm, Z_SYNC_FLUSH);
        if (error == Z_STREAM_END)
        {
            finished = RSSL_TRUE;
        }
        else if (error != Z_OK)
        {
            break;
        }
    }    
    if (inflateEnd(&zStrm) != Z_OK)
    {
        free(unzipBuffer);
        return RSSL_RET_FAILURE;
    }

    if (_decompressed->length < zStrm.total_out)
    {
        _decompressed->data   = (char*)realloc(_decompressed->data, zStrm.total_out * sizeof(char));
        _decompressed->length = zStrm.total_out;
    }
    strncpy(_decompressed->data, unzipBuffer, zStrm.total_out);

    free(unzipBuffer);
    return RSSL_RET_SUCCESS;
}

When I print the decompressed data I see trailing garbage characters at times. Please can you help resolve the parsing issue?

For example:

{"analytics":{"analyticsScores":[{"assetClass":"CMPNY","assetCodes":["P:4295906442","R:FITB.O","R:FITB.OQ"],"assetId":"4295906442","assetName":"Fifth Third Bancorp","brokerAction":"UNDEFINED","firstMentionSentence":1,"linkedIds":[{"idPosition":0,"linkedId":"tr:Bw53zx21a_1803192Rxc8Npn0GKDazbXeuL3x6tqy0DwdBbyXJoqBY"},{"idPosition":1,"linkedId":"tr:Bw33SsfHa_18031521zCrUtMvVPYx7mDyov4S+np/9B9vNmh89QWsW"}],"noveltyCounts":[{"itemCount":1,"window":"12H"},{"itemCount":1,"window":"24H"},{"itemCount":1,"window":"3D"},{"itemCount":2,"window":"5D"},{"itemCount":2,"window":"7D"}],"priceTargetIndicator":"UNDEFINED","relevance":1.00000,"sentimentClass":1,"sentimentNegative":0.0896546,"sentimentNeutral":0.342679,"sentimentPositive":0.567667,"sentimentWordCount":629,"volumeCounts":[{"itemCount":2,"window":"12H"},{"itemCount":2,"window":"24H"},{"itemCount":2,"window":"3D"},{"itemCount":12,"window":"5D"},{"itemCount":17,"window":"7D"}]}],"newsItem":{"bodySize":4377,"companyCount":1,"exchangeAction":"UNDEFINED","headlineTag":"","marketCommentary":false,"sentenceCount":35,"wordCount":634},"systemVersion":"TS:40060091"},"id":"tr:Bw6t7HLsa_180319229lzk5RNAfh1wEpeo0Fk66vPj8mB0uqLP75Qf","newsItem":{"dataType":"News","feedFamilyCode":"tr","headline":"Fifth Third Bank Introduces Card-free ATMs","language":"en","metadata":{"altId":"nBw6t7HLsa","audiences":["NP:BSW","NP:CNR"],"feedTimestamp":"2018-03-19T16:25:00.223Z","firstCreated":"2018-03-19T16:25:00.174Z","isArchive":false,"takeSequence":1},"provider":"NS:BSW","sourceId":"Bw6t7HLsa_180319229lzk5RNAfh1wEpeo0Fk66vPj8mB0uqLP75Qf","sourceTimestamp":"2018-03-19T16:25:00.174Z","subjects":["B:125","B:126","B:127","B:128","B:161","B:1616","B:162","B:168","B:169","B:172","B:174","B:195","B:234","B:239","B:242","B:261","B:262","B:278","B:279","B:282","B:34","B:43","B:49","G:4","G:6J","G:9","G:9F","M:1P2","M:1QD","M:1WN","M:3","M:32","M:E7","M:H1","M:I4","M:S","M:Z","R:FITB.O","N2:AMERS","N2:BISV","N2:BISV08","N2:BNKCOM","N2:BNKS","N2:BSUP","N2:BSVC","N2:CMPELC","N2:CMPNY","N2:CMSS","N2:CMSS08","N2:COMP08","N2:ENT","N2:FINS","N2:FINS08","N2:FLM","N2:GEN","N2:HARW","N2:INDS","N2:INDS08","N2:ISER","N2:ISER08","N2:LEN","N2:NAMER","N2:NEWR","N2:SOCI","N2:SOFW","N2:SRVCS","N2:SWIT","N2:TECH","N2:TECH08","N2:TEEQ","N2:TEEQ08","N2:TMT","N2:US","N2:USAGA","N2:WWW","P:4295906442"],"urgency":3}}q<BE>^A
elektronrefinitiv-realtimeelektron-sdkmrn
icon clock
10 |1500

Up to 2 attachments (including images) can be used with a maximum of 5.0 MiB each and 10.0 MiB total.

1 Answer

Upvotes
Accepted
38.1k 71 35 53

It may relate to how the application print the decompressed data. The decompressed data is a buffer, not a null terminated string. In order to print it, you need to use the following function.

printf("%.*s\n\n", decompressedBuf.length, decompressedBuf.data);	

An asterisk (*) is used to pass the width specifier/precision to printf(). If you use only "%s" format, it will have garbage characters.

You can run an example in the ETA Consumer - Request and Decode Machine Readable News tutorial to verify the problem.

icon clock
10 |1500

Up to 2 attachments (including images) can be used with a maximum of 5.0 MiB each and 10.0 MiB total.

Click below to post an Idea Post Idea