Source code for rse.main.parsers.github

"""

Copyright (C) 2020 Vanessa Sochat.

This Source Code Form is subject to the terms of the
Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed
with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""

import logging
import requests
from rse.utils.urls import check_response

from .base import ParserBase

bot = logging.getLogger("rse.main.parsers.github")


[docs]class GitHubParser(ParserBase): name = "github" matchstring = "github" def __init__(self, uid=None, **kwargs): super().__init__(uid) def _set_uid(self, uid): """Given some kind of GitHub url, parse the uid""" uid = uid.replace(":", "/") owner, repo = uid.replace(".git", "").split("/")[-2:] return "{}/{}".format(owner, repo)
[docs] def load_secrets(self): """load secrets, namely the GitHub token""" self.token = self.get_setting("TOKEN")
[docs] def get_url(self, data=None): """a common function for a parser to return the html url for the upper level of metadata """ data = data or self.data return data.get("html_url")
[docs] def get_avatar(self, data=None): """a common function for a parser to return an image.""" data = data or self.data return data.get("owner", {}).get("avatar_url", "")
[docs] def get_description(self, data=None): """a common function for a parser to return a description.""" data = data or self.data return data.get("description")
[docs] def get_metadata(self, uri=None): """Retrieve repository metadata. The common metadata (timestamp) is added by the software repository parser, and here we need to ensure that the url field is populated with a correct url. Arguments: uri (str) : a repository uri string to override one currently set """ if uri: self.set_uri(uri) self.load_secrets() repo = "/".join(self.uid.split("/")[-2:]) url = "https://api.github.com/repos/%s" % (repo) headers = { "Accept": "application/vnd.github.symmetra-preview+json", } if self.token: headers["Authorization"] = "token %s" % self.token response = requests.get(url, headers=headers) # Successful query! data = check_response(response) if data is None: return None # Only save minimal set self.data = {} for key in [ "name", "url", "full_name", "html_url", "private", "description", "created_at", "updated_at", "clone_url", "homepage", "size", "stargazers_count", "watchers_count", "language", "open_issues_count", "license", "subscribers_count", ]: if key in data: self.data[key] = data[key] self.data["owner"] = {} for key in ["html_url", "avatar_url", "login", "type"]: self.data["owner"][key] = data["owner"][key] # Also try to get topics headers.update({"Accept": "application/vnd.github.mercy-preview+json"}) url = "%s/topics" % url response = requests.get(url, headers=headers) # Successful query! topics = check_response(response) if topics is not None: self.data["topics"] = topics.get("names", []) return self.data