lab_weather/geocoder/bing_batch.py

180 lines
5.4 KiB
Python

#!/usr/bin/python
# coding: utf8
from __future__ import absolute_import, print_function
from geocoder.base import OneResult, MultipleResultsQuery
from geocoder.keys import bing_key
import time
import io
import requests
import logging
import sys
PY2 = sys.version_info < (3, 0)
csv_io = io.BytesIO if PY2 else io.StringIO
LOGGER = logging.getLogger(__name__)
class BingBatchResult(OneResult):
def __init__(self, content):
self._content = content
@property
def lat(self):
coord = self._content
if coord:
return coord[0]
@property
def lng(self):
coord = self._content
if coord:
return coord[1]
def debug(self, verbose=True):
with csv_io() as output:
print('\n', file=output)
print('{} result\n'.format(self.__class__.__name__), file=output)
print('-----------\n', file=output)
print(self._content, file=output)
if verbose:
print(output.getvalue())
return [None, None]
class BingBatch(MultipleResultsQuery):
"""
Bing Maps REST Services
=======================
The Bing™ Maps REST Services Application Programming Interface (API)
provides a Representational State Transfer (REST) interface to
perform tasks such as creating a static map with pushpins, geocoding
an address, retrieving imagery metadata, or creating a route.
API Reference
-------------
http://msdn.microsoft.com/en-us/library/ff701714.aspx
Dataflow Reference
------------------
https://msdn.microsoft.com/en-us/library/ff701733.aspx
"""
provider = 'bing'
_URL = u'http://spatial.virtualearth.net/REST/v1/Dataflows/Geocode'
_BATCH_TIMEOUT = 60
_BATCH_WAIT = 5
_RESULT_CLASS = BingBatchResult
_KEY = bing_key
def extract_resource_id(self, response):
for rs in response['resourceSets']:
for resource in rs['resources']:
if 'id' in resource:
return resource['id']
raise LookupError('No job ID returned from Bing batch call')
def is_job_done(self, job_id):
url = u'http://spatial.virtualearth.net/REST/v1/Dataflows/Geocode/{}'.format(job_id)
response = self.session.get(
url,
params={'key': self.provider_key},
timeout=self.timeout,
proxies=self.proxies
)
for rs in response.json()['resourceSets']:
for resource in rs['resources']:
if resource['id'] == job_id:
if resource['status'] == 'Aborted':
raise LookupError('Bing job aborted')
return resource['status'] == 'Completed'
raise LookupError('Job ID not found in Bing answer - something is wrong')
def get_job_result(self, job_id):
url = u'http://spatial.virtualearth.net/REST/v1/Dataflows/Geocode/{}/output/succeeded'.format(job_id)
response = self.session.get(
url,
params={'key': self.provider_key},
timeout=self.timeout,
proxies=self.proxies
)
return response.content
def _build_params(self, locations, provider_key, **kwargs):
self.batch = self.generate_batch(locations)
self.locations_length = len(locations)
self.provider_key = provider_key
self._BATCH_TIMEOUT = kwargs.get('timeout', 60)
return {
'input': 'csv',
'key': provider_key
}
def _build_headers(self, provider_key, **kwargs):
return {'Content-Type': 'text/plain'}
def _connect(self):
self.status_code = 'Unknown'
try:
self.response = response = self.session.post(
self.url,
data=self.batch,
params=self.params,
headers=self.headers,
timeout=self.timeout,
proxies=self.proxies
)
# check that response is ok
self.status_code = response.status_code
response.raise_for_status()
# rely on json method to get non-empty well formatted JSON
json_response = response.json()
self.url = response.url
LOGGER.info("Requested %s", self.url)
# get the resource/job id
resource_id = self.extract_resource_id(json_response)
elapsed = 0
# try for _BATCH_TIMEOUT seconds to retrieve the results of that job
while (elapsed < self._BATCH_TIMEOUT):
if self.is_job_done(resource_id):
return self.get_job_result(resource_id)
elapsed = elapsed + self._BATCH_WAIT
time.sleep(self._BATCH_WAIT)
LOGGER.error("Job was not finished in time.")
except (requests.exceptions.RequestException, LookupError) as err:
# store real status code and error
self.error = u'ERROR - {}'.format(str(err))
LOGGER.error("Status code %s from %s: %s",
self.status_code, self.url, self.error)
return False
def _parse_results(self, response):
rows = self._adapt_results(response)
# re looping through the results to give them back in their original order
for idx in range(0, self.locations_length):
self.add(self.one_result(rows.get(str(idx), None)))
self.current_result = len(self) > 0 and self[0]