Package intermine :: Module webservice
[hide private]
[frames] | no frames]

Source Code for Module intermine.webservice

  1  from xml.dom import minidom 
  2  import urllib 
  3  from urlparse import urlparse 
  4  import base64 
  5  import UserDict 
  6   
  7  #class UJsonLibDecoder(object): # pragma: no cover 
  8  #    def __init__(self): 
  9  #        self.loads = ujson.decode 
 10  # 
 11  # Use core json for 2.6+, simplejson for <=2.5 
 12  #try: 
 13  #    import ujson 
 14  #    json = UJsonLibDecoder() 
 15  #except ImportError: # pragma: no cover 
 16  try: 
 17      import simplejson as json # Prefer this as it is faster 
 18  except ImportError: # pragma: no cover 
 19      try: 
 20          import json 
 21      except ImportError: 
 22          raise ImportError("Could not find any JSON module to import - " 
 23              + "please install simplejson or jsonlib to continue") 
 24   
 25  # Local intermine imports 
 26  from intermine.query import Query, Template 
 27  from intermine.model import Model, Attribute, Reference, Collection, Column 
 28  from intermine.lists.listmanager import ListManager 
 29  from intermine.errors import ServiceError, WebserviceError 
 30  from intermine.results import InterMineURLOpener, ResultIterator 
 31  from intermine import idresolution 
 32   
 33  """ 
 34  Webservice Interaction Routines for InterMine Webservices 
 35  ========================================================= 
 36   
 37  Classes for dealing with communication with an InterMine 
 38  RESTful webservice. 
 39   
 40  """ 
 41   
 42  __author__ = "Alex Kalderimis" 
 43  __organization__ = "InterMine" 
 44  __license__ = "LGPL" 
 45  __contact__ = "dev@intermine.org" 
46 47 -class Registry(object, UserDict.DictMixin):
48 """ 49 A Class representing an InterMine registry. 50 =========================================== 51 52 Registries are web-services that mines can automatically register 53 themselves with, and thus enable service discovery by clients. 54 55 SYNOPSIS 56 -------- 57 58 example:: 59 60 from intermine.webservice import Registry 61 62 # Connect to the default registry service 63 # at www.intermine.org/registry 64 registry = Registry() 65 66 # Find all the available mines: 67 for name, mine in registry.items(): 68 print name, mine.version 69 70 # Dict-like interface for accessing mines. 71 flymine = registry["flymine"] 72 73 # The mine object is a Service 74 for gene in flymine.select("Gene.*").results(): 75 process(gene) 76 77 This class is meant to aid with interoperation between 78 mines by allowing them to discover one-another, and 79 allow users to always have correct connection information. 80 """ 81 82 MINES_PATH = "/mines.json" 83
84 - def __init__(self, registry_url="http://www.intermine.org/registry"):
85 self.registry_url = registry_url 86 opener = InterMineURLOpener() 87 data = opener.open(registry_url + Registry.MINES_PATH).read() 88 mine_data = json.loads(data) 89 self.__mine_dict = dict(( (mine["name"], mine) for mine in mine_data["mines"])) 90 self.__synonyms = dict(( (name.lower(), name) for name in self.__mine_dict.keys() )) 91 self.__mine_cache = {}
92
93 - def __contains__(self, name):
94 return name.lower() in self.__synonyms
95
96 - def __getitem__(self, name):
97 lc = name.lower() 98 if lc in self.__synonyms: 99 if lc not in self.__mine_cache: 100 self.__mine_cache[lc] = Service(self.__mine_dict[self.__synonyms[lc]]["webServiceRoot"]) 101 return self.__mine_cache[lc] 102 else: 103 raise KeyError("Unknown mine: " + name)
104
105 - def __setitem__(self, name, item):
106 raise NotImplementedError("You cannot add items to a registry")
107
108 - def __delitem__(self, name):
109 raise NotImplementedError("You cannot remove items from a registry")
110
111 - def keys(self):
112 return self.__mine_dict.keys()
113
114 -class Service(object):
115 """ 116 A class representing connections to different InterMine WebServices 117 =================================================================== 118 119 The intermine.webservice.Service class is the main interface for the user. 120 It will provide access to queries and templates, as well as doing the 121 background task of fetching the data model, and actually requesting 122 the query results. 123 124 SYNOPSIS 125 -------- 126 127 example:: 128 129 from intermine.webservice import Service 130 service = Service("http://www.flymine.org/query/service") 131 132 template = service.get_template("Gene_Pathways") 133 for row in template.results(A={"value":"zen"}): 134 do_something_with(row) 135 ... 136 137 query = service.new_query() 138 query.add_view("Gene.symbol", "Gene.pathway.name") 139 query.add_constraint("Gene", "LOOKUP", "zen") 140 for row in query.results(): 141 do_something_with(row) 142 ... 143 144 new_list = service.create_list("some/file/with.ids", "Gene") 145 list_on_server = service.get_list("On server") 146 in_both = new_list & list_on_server 147 in_both.name = "Intersection of these lists" 148 for row in in_both: 149 do_something_with(row) 150 ... 151 152 OVERVIEW 153 -------- 154 The two methods the user will be most concerned with are: 155 - L{Service.new_query}: constructs a new query to query a service with 156 - L{Service.get_template}: gets a template from the service 157 - L{ListManager.create_list}: creates a new list on the service 158 159 For list management information, see L{ListManager}. 160 161 TERMINOLOGY 162 ----------- 163 X{Query} is the term for an arbitrarily complex structured request for 164 data from the webservice. The user is responsible for specifying the 165 structure that determines what records are returned, and what information 166 about each record is provided. 167 168 X{Template} is the term for a predefined "Query", ie: one that has been 169 written and saved on the webservice you will access. The definition 170 of the query is already done, but the user may want to specify the 171 values of the constraints that exist on the template. Templates are accessed 172 by name, and while you can easily introspect templates, it is assumed 173 you know what they do when you use them 174 175 X{List} is a saved result set containing a set of objects previously identified 176 in the database. Lists can be created and managed using this client library. 177 178 @see: L{intermine.query} 179 """ 180 QUERY_PATH = '/query/results' 181 LIST_ENRICHMENT_PATH = '/list/enrichment' 182 WIDGETS_PATH = '/widgets' 183 QUERY_LIST_UPLOAD_PATH = '/query/tolist' 184 QUERY_LIST_APPEND_PATH = '/query/append/tolist' 185 MODEL_PATH = '/model' 186 TEMPLATES_PATH = '/templates/xml' 187 TEMPLATEQUERY_PATH = '/template/results' 188 LIST_PATH = '/lists' 189 LIST_CREATION_PATH = '/lists' 190 LIST_RENAME_PATH = '/lists/rename' 191 LIST_APPENDING_PATH = '/lists/append' 192 LIST_TAG_PATH = '/list/tags' 193 SAVEDQUERY_PATH = '/savedqueries/xml' 194 VERSION_PATH = '/version/ws' 195 RELEASE_PATH = '/version/release' 196 SCHEME = 'http://' 197 SERVICE_RESOLUTION_PATH = "/check/" 198 IDS_PATH = "/ids" 199
200 - def __init__(self, root, 201 username=None, password=None, token=None, 202 prefetch_depth=1, prefetch_id_only=False):
203 """ 204 Constructor 205 =========== 206 207 Construct a connection to a webservice:: 208 209 url = "http://www.flymine.org/query/service" 210 211 # An unauthenticated connection - access to all public data 212 service = Service(url) 213 214 # An authenticated connection - access to private and public data 215 service = Service(url, token="ABC123456") 216 217 218 @param root: the root url of the webservice (required) 219 @param username: your login name (optional) 220 @param password: your password (required if a username is given) 221 @param token: your API access token(optional - used in preference to username and password) 222 223 @raise ServiceError: if the version cannot be fetched and parsed 224 @raise ValueError: if a username is supplied, but no password 225 226 There are two alternative authentication systems supported by InterMine 227 webservices. The first is username and password authentication, which 228 is supported by all webservices. Newer webservices (version 6+) 229 also support API access token authentication, which is the recommended 230 system to use. Token access is more secure as you will never have 231 to transmit your username or password, and the token can be easily changed 232 or disabled without changing your webapp login details. 233 234 """ 235 o = urlparse(root) 236 if not o.scheme: root = "http://" + root 237 if not root.endswith("/service"): root = root + "/service" 238 239 self.root = root 240 self.prefetch_depth = prefetch_depth 241 self.prefetch_id_only = prefetch_id_only 242 # Initialize empty cached data. 243 self._templates = None 244 self._model = None 245 self._version = None 246 self._release = None 247 self._widgets = None 248 self._list_manager = ListManager(self) 249 self.__missing_method_name = None 250 if token: 251 self.opener = InterMineURLOpener(token=token) 252 elif username: 253 if token: 254 raise ValueError("Both username and token credentials supplied") 255 256 if not password: 257 raise ValueError("Username given, but no password supplied") 258 259 self.opener = InterMineURLOpener((username, password)) 260 else: 261 self.opener = InterMineURLOpener() 262 263 try: 264 self.version 265 except WebserviceError, e: 266 raise ServiceError("Could not validate service - is the root url (%s) correct? %s" % (root, e)) 267 268 if token and self.version < 6: 269 raise ServiceError("This service does not support API access token authentication") 270 271 # Set up sugary aliases 272 self.query = self.new_query
273 274 # Delegated list methods 275 276 LIST_MANAGER_METHODS = frozenset(["get_list", "get_all_lists", 277 "get_all_list_names", 278 "create_list", "get_list_count", "delete_lists", "l"]) 279
280 - def list_manager(self):
281 """ 282 Get a new ListManager to use with this service. 283 =============================================== 284 285 This method is primarily useful as a context manager 286 when creating temporary lists, since on context exit all 287 temporary lists will be cleaned up:: 288 289 with service.list_manager() as manager: 290 temp_a = manager.create_list(file_a, "Gene") 291 temp_b = manager.create_list(file_b, "Gene") 292 for gene in (temp_a & temp_b): 293 print gene.primaryIdentifier, "is in both" 294 295 @rtype: ListManager 296 """ 297 return ListManager(self)
298
299 - def __getattribute__(self, name):
300 return object.__getattribute__(self, name)
301
302 - def __getattr__(self, name):
303 if name in self.LIST_MANAGER_METHODS: 304 method = getattr(self._list_manager, name) 305 return method 306 raise AttributeError("Could not find " + name)
307
308 - def __del__(self): # On going out of scope, try and clean up.
309 try: 310 self._list_manager.delete_temporary_lists() 311 except ReferenceError: 312 pass
313 314 @property
315 - def version(self):
316 """ 317 Returns the webservice version 318 ============================== 319 320 The version specifies what capabilities a 321 specific webservice provides. The most current 322 version is 3 323 324 may raise ServiceError: if the version cannot be fetched 325 326 @rtype: int 327 """ 328 if self._version is None: 329 try: 330 url = self.root + self.VERSION_PATH 331 self._version = int(self.opener.open(url).read()) 332 except ValueError, e: 333 raise ServiceError("Could not parse a valid webservice version: " + str(e)) 334 return self._version
335
336 - def resolve_service_path(self, variant):
337 """Resolve the path to optional services""" 338 url = self.root + self.SERVICE_RESOLUTION_PATH + variant 339 return self.opener.open(url).read()
340 341 @property
342 - def release(self):
343 """ 344 Returns the datawarehouse release 345 ================================= 346 347 Service.release S{->} string 348 349 The release is an arbitrary string used to distinguish 350 releases of the datawarehouse. This usually coincides 351 with updates to the data contained within. While a string, 352 releases usually sort in ascending order of recentness 353 (eg: "release-26", "release-27", "release-28"). They can also 354 have less machine readable meanings (eg: "beta") 355 356 @rtype: string 357 """ 358 if self._release is None: 359 self._release = urllib.urlopen(self.root + self.RELEASE_PATH).read() 360 return self._release
361
362 - def load_query(self, xml, root=None):
363 """ 364 Construct a new Query object for the given webservice 365 ===================================================== 366 367 This is the standard method for instantiating new Query 368 objects. Queries require access to the data model, as well 369 as the service itself, so it is easiest to access them through 370 this factory method. 371 372 @return: L{intermine.query.Query} 373 """ 374 return Query.from_xml(xml, self.model, root=root)
375
376 - def select(self, *columns, **kwargs):
377 """ 378 Construct a new Query object with the given columns selected. 379 ============================================================= 380 381 As new_query, except that instead of a root class, a list of 382 output column expressions are passed instead. 383 """ 384 if "xml" in kwargs: 385 return self.load_query(kwargs["xml"]) 386 if len(columns) == 1: 387 view = columns[0] 388 if isinstance(view, Attribute): 389 return Query(self.model, self).select("%s.%s" % (view.declared_in.name, view)) 390 if isinstance(view, Reference): 391 return Query(self.model, self).select("%s.%s.*" % (view.declared_in.name, view)) 392 elif not isinstance(view, Column) and not str(view).endswith("*"): 393 path = self.model.make_path(view) 394 if not path.is_attribute(): 395 return Query(self.model, self).select(str(view) + ".*") 396 return Query(self.model, self).select(*columns)
397 398 new_query = select 399
400 - def get_template(self, name):
401 """ 402 Returns a template of the given name 403 ==================================== 404 405 Tries to retrieve a template of the given name 406 from the webservice. If you are trying to fetch 407 a private template (ie. one you made yourself 408 and is not available to others) then you may need to authenticate 409 410 @see: L{intermine.webservice.Service.__init__} 411 412 @param name: the template's name 413 @type name: string 414 415 @raise ServiceError: if the template does not exist 416 @raise QueryParseError: if the template cannot be parsed 417 418 @return: L{intermine.query.Template} 419 """ 420 try: 421 t = self.templates[name] 422 except KeyError: 423 raise ServiceError("There is no template called '" 424 + name + "' at this service") 425 if not isinstance(t, Template): 426 t = Template.from_xml(t, self.model, self) 427 self.templates[name] = t 428 return t
429 430 @property
431 - def widgets(self):
432 """ 433 The dictionary of widgets from the webservice 434 ============================================== 435 436 The set of widgets available to a service does not 437 change between releases, so they are cached. 438 If you are running a long running process, you may 439 wish to periodically dump the cache by calling 440 L{Service.flush}, or simply get a new Service object. 441 442 @return dict 443 """ 444 if self._widgets is None: 445 sock = self.opener.open(self.root + self.WIDGETS_PATH) 446 text = sock.read() 447 sock.close() 448 data = json.loads(text) 449 if data['error'] is not None: 450 raise ServiceError(data['error']) 451 self._widgets = dict(([w['name'], w] for w in data['widgets'])) 452 return self._widgets
453
454 - def resolve_ids(self, data_type, identifiers, extra = '', case_sensitive = False, wildcards = False):
455 """ 456 Submit an Identifier Resolution Job 457 =================================== 458 459 Request that a set of identifiers be resolved to objects in 460 the data store. 461 462 @param data_type: The type of these identifiers (eg. 'Gene') 463 @type data_type: String 464 465 @param identifiers: The ids to resolve (eg. ['eve', 'zen', 'pparg']) 466 @type identifiers: iterable of string 467 468 @param extra: A disambiguating value (eg. "Drosophila melanogaster") 469 @type extra: String 470 471 @param case_sensitive: Whether to treat IDs case sensitively. 472 @type case_sensitive: Boolean 473 474 @param wildcards: Whether or not to interpret wildcards (eg: "eve*") 475 @type wildcards: Boolean 476 477 @return: {idresolution.Job} The job. 478 """ 479 if self.version < 10: 480 raise ServiceError("This feature requires API version 10+") 481 if not data_type: 482 raise ServiceError("No data-type supplied") 483 if not identifiers: 484 raise ServiceError("No identifiers supplied") 485 486 data = json.dumps({ 487 "type": data_type, 488 "identifiers": list(identifiers), 489 "extra": extra, 490 "caseSensitive": case_sensitive, 491 "wildCards": wildcards 492 }) 493 text = self.opener.post_content(self.root + self.IDS_PATH, data, InterMineURLOpener.JSON) 494 ret = json.loads(text) 495 if ret['error'] is not None: 496 raise ServiceError(ret['error']) 497 if ret['uid'] is None: 498 raise Exception("No uid found in " + ret) 499 500 return idresolution.Job(self, ret['uid'])
501
502 - def flush(self):
503 """ 504 Flushes any cached data. 505 """ 506 self._templates = None 507 self._model = None 508 self._version = None 509 self._release = None 510 self._widgets = None
511 512 @property
513 - def templates(self):
514 """ 515 The dictionary of templates from the webservice 516 =============================================== 517 518 Service.templates S{->} dict(intermine.query.Template|string) 519 520 For efficiency's sake, Templates are not parsed until 521 they are required, and until then they are stored as XML 522 strings. It is recommended that in most cases you would want 523 to use L{Service.get_template}. 524 525 You can use this property however to test for template existence though:: 526 527 if name in service.templates: 528 template = service.get_template(name) 529 530 @rtype: dict 531 532 """ 533 if self._templates is None: 534 sock = self.opener.open(self.root + self.TEMPLATES_PATH) 535 dom = minidom.parse(sock) 536 sock.close() 537 templates = {} 538 for e in dom.getElementsByTagName('template'): 539 name = e.getAttribute('name') 540 if name in templates: 541 raise ServiceError("Two templates with same name: " + name) 542 else: 543 templates[name] = e.toxml() 544 self._templates = templates 545 return self._templates
546 547 @property
548 - def model(self):
549 """ 550 The data model for the webservice you are querying 551 ================================================== 552 553 Service.model S{->} L{intermine.model.Model} 554 555 This is used when constructing queries to provide them 556 with information on the structure of the data model 557 they are accessing. You are very unlikely to want to 558 access this object directly. 559 560 raises ModelParseError: if the model cannot be read 561 562 @rtype: L{intermine.model.Model} 563 564 """ 565 if self._model is None: 566 model_url = self.root + self.MODEL_PATH 567 self._model = Model(model_url, self) 568 return self._model
569
570 - def get_results(self, path, params, rowformat, view, cld=None):
571 """ 572 Return an Iterator over the rows of the results 573 =============================================== 574 575 This method is called internally by the query objects 576 when they are called to get results. You will not 577 normally need to call it directly 578 579 @param path: The resource path (eg: "/query/results") 580 @type path: string 581 @param params: The query parameters for this request as a dictionary 582 @type params: dict 583 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" 584 @type rowformat: string 585 @param view: The output columns 586 @type view: list 587 588 @raise WebserviceError: for failed requests 589 590 @return: L{intermine.webservice.ResultIterator} 591 """ 592 return ResultIterator(self, path, params, rowformat, view, cld)
593