I tried to use the REST API to retrieve Speedguide pages, as explained in the user guide (v3.0) of Thomson Reuters Tick History 11.1 REST API Chapter 5. After figuring out that the error in the first reply can be ignored and I can go ahead looking for the ExtractionID and extract files in it, I can successfully get the data of the page as described in the manual. However, I found that it takes a really long time to get the page. When fetching manually, I usually get the page in 10 seconds. But when fetched with the REST API, it usually takes between 30 seconds to 1 minute in order to get a Speedguide page, in particular for the Extractions/ExtractRaw call to complete. Is there anything I can do to fetch the data faster? I attach my complete Python test program for reference.
import datetime import getpass import json import os import re import time import requests def make_dss_odata(od_type, **kwargs): "Make a dict object as DSS odata" kwargs.update({'@odata.type': '#ThomsonReuters.Dss.Api.' + od_type}) return kwargs class ReutersDSSCaller(object): "Make REST API call to Reuters DataScope Select" _BASEURL = 'https://hosted.datascopeapi.reuters.com/RestApi/v1' def __init__(self, user, pw): self._user = user self._pw = pw self._auth_token = None def get(self, *args, **kwargs): "Call REST get method" return self._call('get', *args, **kwargs) def put(self, *args, **kwargs): "Call REST put method" return self._call('put', *args, **kwargs) def delete(self, *args, **kwargs): "Call REST delete method" return self._call('delete', *args, **kwargs) def post(self, *args, **kwargs): "Call REST post method" return self._call('post', *args, **kwargs) def _call(self, method, path, headers=(), body=None, parse=3, stream=False): # pylint: disable=R0913 headers = dict(headers) headers.setdefault('Authorization', 'Token ' + self.get_auth_token()) return self._raw_call(method, path, headers, body, parse, stream) def get_auth_token(self): "Get DSS REST API authentication token" if not self._auth_token: ret = self._raw_call('post', 'Authentication/RequestToken', {}, { 'Credentials': { 'Password': self._pw, 'Username': self._user } }, 3, False) self._auth_token = ret return self._auth_token def _raw_call(self, method, path, extraheaders, body, parse, stream): # pylint: disable=R0913 headers = {'Content-Type': 'application/json; odata.metadata=minimal'} headers.update(extraheaders) body_str = '' if body is None else json.dumps(body) resp = getattr(requests, method)(self._BASEURL + '/' + path, data=body_str, headers=headers, stream=stream) if parse <= 0: return resp if resp.status_code >= 400: raise RuntimeError('DSS API Error %s: %s' % (resp.status_code, resp.reason)) if parse <= 1 or stream: return resp ret = resp.json() return ret if parse <= 2 else ret['value'] def download(self, resp, path): "Download content in REST API response" tmp_path = path + '.tmp' with open(tmp_path, 'wb') as fout: if path.endswith('.gz'): content_iter = self._iter_raw_resp(resp, 1024 * 1024) else: content_iter = resp.iter_content(chunk_size=1024 * 1024) for chunk in content_iter: fout.write(chunk) os.rename(tmp_path, path) def _iter_raw_resp(self, resp, chunk_size): while True: chunk = resp.raw.read(chunk_size) if len(chunk) == 0: break yield chunk # Create caller user_name = raw_input('DSS User name: ') passwd = getpass.getpass('DSS password: ') caller = ReutersDSSCaller(user_name, passwd) page = raw_input('Speedguide page: ') if not page: page = 'CBT/TY' print 'Using default,', page # Submit extraction request today = datetime.datetime.combine(datetime.date.today(), datetime.time()) start_date = today - datetime.timedelta(7) print 'Initial call' ret = caller.post( 'Extractions/ExtractRaw', headers={'Prefer': 'respond-async'}, body={ 'ExtractionRequest': make_dss_odata( 'Extractions.ExtractionRequests.' 'TickHistoryRawExtractionRequest', IdentifierList=make_dss_odata( 'Extractions.ExtractionRequests.InstrumentIdentifierList', ValidationOptions={'AllowHistoricalInstruments': True}, UseUserPreferencesForValidationOptions=False, InstrumentIdentifiers=[ {'Identifier': page, 'IdentifierType': 'Ric'} ] ), Condition={ 'MessageTimeStampIn': 'GmtUtc', 'ReportDateRangeType': 'Range', 'QueryStartDate': start_date.isoformat(), 'QueryEndDate': today.isoformat(), 'ExtractBy': 'Ric', 'SortBy': 'SingleByRic', 'DomainCode': 'MarketPrice', 'DisplaySourceRIC': True }, ), }, parse=1) # Poll for completion if ret.status_code == 202: loc = ret.headers['location'].partition('/v1/')[2] while ret.status_code == 202: print 'Initial call retry' time.sleep(5) ret = caller.get(loc, headers={'Prefer': 'respond-async'}, parse=1) print 'Initial call completed' # Look for extraction ID match = re.search(r'Extraction ID: ([0-9]+)\n', ret.json()['Notes'][0]) eid = match.group(1) # List Report Files file_list = caller.get('Extractions/ReportExtractions(\'%s\')/Files' % eid) # Download the Files for f_spec in file_list: filename = f_spec['ExtractedFileName'] ret = caller.get('Extractions/ExtractedFiles(\'%s\')/$value' % f_spec['ExtractedFileId'], stream=True) print 'Downloading', filename caller.download(ret, filename)