#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""DEPRECATED mix-in handler for bulk loading data into an application.
Please use the new bulkloader.
"""
import Cookie
import StringIO
import csv
import httplib
import os
import traceback
import wsgiref.handlers
from google.appengine.api import datastore
from google.appengine.ext import webapp
from google.appengine.ext.bulkload import constants
[docs]def Validate(value, type):
""" Checks that value is non-empty and of the right type.
Raises ValueError if value is None or empty, TypeError if it's not the given
type.
Args:
value: any value
type: a type or tuple of types
"""
if not value:
raise ValueError('Value should not be empty; received %s.' % value)
elif not isinstance(value, type):
raise TypeError('Expected a %s, but received %s (a %s).' %
(type, value, value.__class__))
[docs]class Loader(object):
"""A base class for creating datastore entities from input data.
To add a handler for bulk loading a new entity kind into your datastore,
write a subclass of this class that calls Loader.__init__ from your
class's __init__.
If you need to run extra code to convert entities from the input
data, create new properties, or otherwise modify the entities before
they're inserted, override HandleEntity.
See the CreateEntity method for the creation of entities from the
(parsed) input data.
"""
__loaders = {}
__kind = None
__properties = None
def __init__(self, kind, properties):
""" Constructor.
Populates this Loader's kind and properties map. Also registers it with
the bulk loader, so that all you need to do is instantiate your Loader,
and the bulkload handler will automatically use it.
Args:
kind: a string containing the entity kind that this loader handles
properties: list of (name, converter) tuples.
This is used to automatically convert the CSV columns into properties.
The converter should be a function that takes one argument, a string
value from the CSV file, and returns a correctly typed property value
that should be inserted. The tuples in this list should match the
columns in your CSV file, in order.
For example:
[('name', str),
('id_number', int),
('email', datastore_types.Email),
('user', users.User),
('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))),
('description', datastore_types.Text),
]
"""
Validate(kind, basestring)
self.__kind = kind
Validate(properties, list)
for name, fn in properties:
Validate(name, basestring)
assert callable(fn), (
'Conversion function %s for property %s is not callable.' % (fn, name))
self.__properties = properties
Loader.__loaders[kind] = self
[docs] def kind(self):
""" Return the entity kind that this Loader handes.
"""
return self.__kind
[docs] def CreateEntity(self, values, key_name=None):
""" Creates an entity from a list of property values.
Args:
values: list/tuple of str
key_name: if provided, the name for the (single) resulting Entity
Returns:
list of datastore.Entity
The returned entities are populated with the property values from the
argument, converted to native types using the properties map given in
the constructor, and passed through HandleEntity. They're ready to be
inserted.
Raises:
AssertionError if the number of values doesn't match the number
of properties in the properties map.
"""
Validate(values, (list, tuple))
assert len(values) == len(self.__properties), (
'Expected %d CSV columns, found %d.' %
(len(self.__properties), len(values)))
entity = datastore.Entity(self.__kind, name=key_name)
for (name, converter), val in zip(self.__properties, values):
if converter is bool and val.lower() in ('0', 'false', 'no'):
val = False
entity[name] = converter(val)
entities = self.HandleEntity(entity)
if entities is not None:
if not isinstance(entities, (list, tuple)):
entities = [entities]
for entity in entities:
if not isinstance(entity, datastore.Entity):
raise TypeError('Expected a datastore.Entity, received %s (a %s).' %
(entity, entity.__class__))
return entities
[docs] def HandleEntity(self, entity):
""" Subclasses can override this to add custom entity conversion code.
This is called for each entity, after its properties are populated from
CSV but before it is stored. Subclasses can override this to add custom
entity handling code.
The entity to be inserted should be returned. If multiple entities should
be inserted, return a list of entities. If no entities should be inserted,
return None or [].
Args:
entity: datastore.Entity
Returns:
datastore.Entity or list of datastore.Entity
"""
return entity
[docs] @staticmethod
def RegisteredLoaders():
""" Returns a list of the Loader instances that have been created.
"""
return dict(Loader.__loaders)
[docs]class BulkLoad(webapp.RequestHandler):
"""A handler for bulk load requests.
This class contains handlers for the bulkloading process. One for
GET to provide cookie information for the upload script, and one
handler for a POST request to upload the entities.
In the POST request, the body contains the data representing the
entities' property values. The original format was a sequences of
lines of comma-separated values (and is handled by the Load
method). The current (version 1) format is a binary format described
in the Tools and Libraries section of the documentation, and is
handled by the LoadV1 method).
"""
[docs] def get(self):
""" Handle a GET. Just show an info page.
"""
page = self.InfoPage(self.request.uri)
self.response.out.write(page)
[docs] def post(self):
""" Handle a POST. Reads CSV data, converts to entities, and stores them.
"""
self.response.headers['Content-Type'] = 'text/plain'
response, output = self.Load(self.request.get(constants.KIND_PARAM),
self.request.get(constants.CSV_PARAM))
self.response.set_status(response)
self.response.out.write(output)
[docs] def InfoPage(self, uri):
""" Renders an information page with the POST endpoint and cookie flag.
Args:
uri: a string containing the request URI
Returns:
A string with the contents of the info page to be displayed
"""
page = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html><head>
<title>Bulk Loader</title>
</head><body>"""
page += ('The bulk load endpoint is: <a href="%s">%s</a><br />\n' %
(uri, uri))
cookies = os.environ.get('HTTP_COOKIE', None)
if cookies:
cookie = Cookie.BaseCookie(cookies)
for param in ['ACSID', 'dev_appserver_login']:
value = cookie.get(param)
if value:
page += ("Pass this flag to the client: --cookie='%s=%s'\n" %
(param, value.value))
break
else:
page += 'No cookie found!\n'
page += '</body></html>'
return page
[docs] def IterRows(self, reader):
""" Yields a tuple of a line number and row for each row of the CSV data.
Args:
reader: a csv reader for the input data.
"""
line_num = 1
for columns in reader:
yield (line_num, columns)
line_num += 1
[docs] def LoadEntities(self, iter, loader, key_format=None):
"""Generates entities and loads them into the datastore. Returns
a tuple of HTTP code and string reply.
Args:
iter: an iterator yielding pairs of a line number and row contents.
key_format: a format string to convert a line number into an
entity id. If None, then entity ID's are automatically generated.
"""
entities = []
output = []
for line_num, columns in iter:
key_name = None
if key_format is not None:
key_name = key_format % line_num
if columns:
try:
output.append('\nLoading from line %d...' % line_num)
new_entities = loader.CreateEntity(columns, key_name=key_name)
if new_entities:
entities.extend(new_entities)
output.append('done.')
except:
stacktrace = traceback.format_exc()
output.append('error:\n%s' % stacktrace)
return (httplib.BAD_REQUEST, ''.join(output))
datastore.Put(entities)
return (httplib.OK, ''.join(output))
[docs] def Load(self, kind, data):
"""Parses CSV data, uses a Loader to convert to entities, and stores them.
On error, fails fast. Returns a "bad request" HTTP response code and
includes the traceback in the output.
Args:
kind: a string containing the entity kind that this loader handles
data: a string containing the CSV data to load
Returns:
tuple (response code, output) where:
response code: integer HTTP response code to return
output: string containing the HTTP response body
"""
data = data.encode('utf-8')
Validate(kind, basestring)
Validate(data, basestring)
output = []
try:
loader = Loader.RegisteredLoaders()[kind]
except KeyError:
output.append('Error: no Loader defined for kind %s.' % kind)
return (httplib.BAD_REQUEST, ''.join(output))
buffer = StringIO.StringIO(data)
reader = csv.reader(buffer, skipinitialspace=True)
try:
csv.field_size_limit(800000)
except AttributeError:
pass
return self.LoadEntities(self.IterRows(reader), loader)
[docs]def main(*loaders):
"""Starts bulk upload.
Raises TypeError if not, at least one Loader instance is given.
Args:
loaders: One or more Loader instance.
"""
if not loaders:
raise TypeError('Expected at least one argument.')
for loader in loaders:
if not isinstance(loader, Loader):
raise TypeError('Expected a Loader instance; received %r' % loader)
application = webapp.WSGIApplication([('.*', BulkLoad)])
wsgiref.handlers.CGIHandler().run(application)
if __name__ == '__main__':
main()