~akspecs/numbeo-scraping

139069be2b5c772ae5a9d22562f70c8001485f7d — Rebecca Medrano 7 months ago bf0e188 master
spiders/wiki_data.py: adjust toDecimalCoordinate

	- remove duplicate function and adjust function to handle
          edge cases
	- json2sqlite: wrap latitude and longitude in quotes tp fix
          error with null values
2 files changed, 13 insertions(+), 25 deletions(-)

M numbeo/json2sqlite.py
M numbeo/numbeo/spiders/wiki_data.py
M numbeo/json2sqlite.py => numbeo/json2sqlite.py +2 -2
@@ 90,8 90,8 @@ with open('wiki_data.json') as f:
    for city in cities:
        cur.execute(f'''
                    UPDATE cities
                    SET latitude = {city['latitude']},
                    longitude = {city['longitude']}
                    SET latitude = "{city['latitude']}",
                    longitude = "{city['longitude']}"
                    WHERE city_id = "{city['city_id']}"
                    '''
        )

M numbeo/numbeo/spiders/wiki_data.py => numbeo/numbeo/spiders/wiki_data.py +11 -23
@@ 19,12 19,17 @@ url_ids = {}

# Convert coordinates in degree/minute/second form to decimal
def toDecimalCoordinate(coordinate):
    coordinate = coordinate.replace('′', '°').replace('″', '°').split('°')
    decimal = int(coordinate[0]) + \
              int(coordinate[1]) / 60 + \
              int(coordinate[2]) / 3600
    if coordinate[3] in ['W','S']:
        decimal = -decimal
    if coordinate is None:
        decimal = ''
    else:
        coordinate = coordinate.replace('′', '°').replace('″', '°').split('°')
        decimal = float(coordinate[0])
        if coordinate[1] not in ['N', 'S', 'E', 'W']:
            decimal += float(coordinate[1]) / 60
        if len(coordinate) > 2  and coordinate[2] not in ['N', 'S', 'E', 'W']:
            decimal += float(coordinate[2]) / 3600
        if coordinate[-1] in ['W','S']:
            decimal = -decimal
    return decimal




@@ 45,14 50,6 @@ class WikiDataSpider(scrapy.Spider):
        url_ids[url] = city[0]
        start_urls.append(url)

    def toDecimalCoordinate(coordinate):
        coordinate = re.split('°′″')
        decimal = coordinate[0] + \
                  coordinate[1] / 60 + \
                  coordinate[2] / 3600
        if coordinate[3] in ['W','S']:
            decimal = -decimal
        return decimal

    def parse(self, response):
        wiki_img = response.xpath(


@@ 73,12 70,3 @@ class WikiDataSpider(scrapy.Spider):
            'latitude': latitude,
            'longitude': longitude,
        }

    def toDecimalCoordinate(coordinate):
        coordinate = re.split('°′″')
        decimal = coordinate[0] + \
                  coordinate[1] / 60 + \
                  coordinate[2] / 3600
        if coordinate[3] in ['W','S']:
            decimal = -decimal
        return decimal