gharchive package

Python SDK to access Github Archive

Submodules

gharchive.exc module

exception gharchive.exc.NoArchiveForDateException[source]

Bases: Exception

exception gharchive.exc.NoArchiveMatchingCriteraException[source]

Bases: Exception

gharchive.logger module

gharchive.main module

class gharchive.main.GHArchive[source]

Bases: object

Main class for getting Github Archive data.

Examples
>>> from gharchive import GHArchive
>>> gh = GHArchive()
...
>>> data = gh.get('6/8/2020', '6/10/2020', filters=[
>>>     ('repo.name', 'bitcoin/bitcoin'),
>>>     ('type', 'WatchEvent')
>>> ])
get(start_date, end_date=None, filters=None)[source]

Get data from the Github Archive

Parameters
  • start_date (str) – string of date or datetime to start collection

  • end_date (Optional[str]) – string of date or datetime to end collection, defaults to

the same as start date :type _sphinx_paramlinks_gharchive.main.GHArchive.get.filters: Optional[Sequence[Tuple[str, Union[int, float, str]]]] :param _sphinx_paramlinks_gharchive.main.GHArchive.get.filters: filters in the format of two-element tuples for which the first element is the . separated path to look up a value from the Archive object and the second is a value which it should be equal to :rtype: Archive :return: Archive object containing Github Archive data

Examples
>>> from gharchive import GHArchive
>>> gh = GHArchive()
...
>>> data = gh.get('6/8/2020', '6/10/2020', filters=[
>>>     ('repo.name', 'bitcoin/bitcoin'),
>>>     ('type', 'WatchEvent')
>>> ])

gharchive.models module

class gharchive.models.Actor(id, login, display_login, gravatar_id, url, avatar_url)[source]

Bases: gharchive.models.SeriesSerializable

__init__(id, login, display_login, gravatar_id, url, avatar_url)[source]

Initialize self. See help(type(self)) for accurate signature.

avatar_url: Optional[str]
display_login: Optional[str]
static from_dict(obj)[source]
Return type

Actor

gravatar_id: Optional[str]
id: Optional[int]
login: Optional[str]
to_dict()[source]
Return type

dict

url: Optional[str]
class gharchive.models.Archive(data)[source]

Bases: object

__init__(data)[source]

Initialize self. See help(type(self)) for accurate signature.

data: List[ArchiveElement]
filter(filters)[source]
Return type

Archive

classmethod from_dict_list(data)[source]
classmethod from_gzip_bytes(b)[source]
classmethod from_response(resp)[source]
to_df()[source]
Return type

DataFrame

to_dict_list()[source]
Return type

List[dict]

class gharchive.models.ArchiveElement(id, type, actor, repo, payload, public, created_at)[source]

Bases: gharchive.models.SeriesSerializable

__init__(id, type, actor, repo, payload, public, created_at)[source]

Initialize self. See help(type(self)) for accurate signature.

actor: Optional[Actor]
created_at: Optional[datetime]
static from_dict(obj)[source]
Return type

ArchiveElement

id: Optional[str]
payload: Optional[Payload]
public: Optional[bool]
repo: Optional[Repo]
to_dict()[source]
Return type

dict

type: Optional[str]
class gharchive.models.Author(name, email)[source]

Bases: gharchive.models.SeriesSerializable

__init__(name, email)[source]

Initialize self. See help(type(self)) for accurate signature.

email: Optional[str]
static from_dict(obj)[source]
Return type

Author

name: Optional[str]
to_dict()[source]
Return type

dict

class gharchive.models.Commit(sha, author, message, distinct, url)[source]

Bases: gharchive.models.SeriesSerializable

__init__(sha, author, message, distinct, url)[source]

Initialize self. See help(type(self)) for accurate signature.

author: Optional[Author]
distinct: Optional[bool]
static from_dict(obj)[source]
Return type

Commit

message: Optional[str]
sha: Optional[str]
to_dict()[source]
Return type

dict

url: Optional[str]
class gharchive.models.Payload(ref, ref_type, pusher_type, push_id, size, distinct_size, head, before, commits)[source]

Bases: gharchive.models.SeriesSerializable

__init__(ref, ref_type, pusher_type, push_id, size, distinct_size, head, before, commits)[source]

Initialize self. See help(type(self)) for accurate signature.

before: Optional[str]
commits: Optional[List[Commit]]
distinct_size: Optional[int]
static from_dict(obj)[source]
Return type

Payload

head: Optional[str]
push_id: Optional[int]
pusher_type: Optional[str]
ref: Optional[str]
ref_type: Optional[str]
size: Optional[int]
to_dict()[source]
Return type

dict

class gharchive.models.Repo(id, name, url)[source]

Bases: gharchive.models.SeriesSerializable

__init__(id, name, url)[source]

Initialize self. See help(type(self)) for accurate signature.

static from_dict(obj)[source]
Return type

Repo

id: Optional[int]
name: Optional[str]
to_dict()[source]
Return type

dict

url: Optional[str]
class gharchive.models.SeriesSerializable[source]

Bases: object

to_dict()[source]
Return type

dict

to_series()[source]
Return type

Series

gharchive.models.from_bool(x)[source]
Return type

bool

gharchive.models.from_datetime(x)[source]
Return type

datetime

gharchive.models.from_int(x)[source]
Return type

int

gharchive.models.from_list(f, x)[source]
Return type

List[~T]

gharchive.models.from_none(x)[source]
Return type

Any

gharchive.models.from_str(x)[source]
Return type

str

gharchive.models.from_union(fs, x)[source]
gharchive.models.to_class(c, x)[source]
Return type

dict

gharchive.search module

class gharchive.search.SearchDates(start_date, end_date=None)[source]

Bases: object

__init__(start_date, end_date=None)[source]

Initialize self. See help(type(self)) for accurate signature.

make_strs()[source]
Return type

List[str]

strings: List[str]

gharchive.unzip module

gharchive.unzip.decompress(b)[source]
Return type

bytes