Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ jobs:
tests:
runs-on: ubuntu-24.04
env:
# This is a PAT for @choldgraf that only has read-access to this repo.
# We use it to avoid query limits.
GITHUB_ACCESS_TOKEN: "${{ secrets.TOKEN_READONLY }}"
# Use TOKEN_READONLY if available (pushed branches), otherwise use GITHUB_TOKEN (PRs)
# TOKEN_READONLY is a PAT for @choldgraf that only has read-access to this repo.
Comment thread
choldgraf marked this conversation as resolved.
Outdated
GITHUB_ACCESS_TOKEN: "${{ secrets.TOKEN_READONLY || secrets.GITHUB_TOKEN }}"
strategy:
matrix:
include:
Expand Down
10 changes: 5 additions & 5 deletions docs/use.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,17 @@ To include Issues and Pull Requests that were _opened_ in a time period, use the

## Remove bots from the changelog

`github-activity` ships with a known list of bot usernames, but your project may use ones not on our list.
To ignore additional usernames from the changelog, use the `--ignore-contributor` flag:
`github-activity` automatically detects and excludes bot accounts using GitHub's API.
Bot accounts (like `dependabot`, `github-actions`, etc.) are identified by their account type in GitHub's data.

To ignore additional human contributors from the changelog, use the `--ignore-contributor` flag:

```
github-activity ... --ignore-contributor robot-one --ignore-contributor 'robot-two*'
github-activity ... --ignore-contributor user-one --ignore-contributor 'test-user-*'
```

Wildcards are matched as per [filename matching semantics](https://docs.python.org/3/library/fnmatch.html#fnmatch.fnmatch).

If this is a generic bot username, consider contributing it back to [our list](https://github.com/executablebooks/github-activity/blob/main/github_activity/github_activity.py#L73).

## Use a GitHub API token

`github-activity` uses the GitHub API to pull information about a repository's activity.
Expand Down
36 changes: 9 additions & 27 deletions github_activity/github_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,29 +96,6 @@
]
)

# exclude known bots from contributor lists
# Also see 'ignore-contributor' flag/configuration option.
BOT_USERS = {
"changeset-bot*",
"codecov*",
"codecov-io*",
"dependabot*",
"github-actions*",
"henchbot*",
"jupyterlab-dev-mode*",
"lgtm-com*",
"meeseeksmachine*",
"names*",
"now*",
"pre-commit-ci*",
"renovate*",
"review-notebook-app*",
"support*",
"stale*",
"todo*",
"welcome*",
}


def get_activity(
target, since, until=None, repo=None, kind=None, auth=None, cache=None
Expand Down Expand Up @@ -484,10 +461,15 @@ def generate_activity_md(
# add column for participants in each issue (not just original author)
data["contributors"] = [[]] * len(data)

# Get bot users from GraphQL data (stored in DataFrame attrs)
bot_users = data.attrs.get("bot_users", set())

def ignored_user(username):
return any(fnmatch.fnmatch(username, bot) for bot in BOT_USERS) or any(
fnmatch.fnmatch(username, user) for user in ignored_contributors
)
if username in bot_users:
return True
if any(fnmatch.fnmatch(username, user) for user in ignored_contributors):
return True
return False

def filter_ignored(userlist):
return {user for user in userlist if not ignored_user(user)}
Expand Down Expand Up @@ -525,7 +507,7 @@ def filter_ignored(userlist):

comment_author = comment_author["login"]
if ignored_user(comment_author):
# ignore bots
# ignore bots and user-specified contributors
continue

# Add to list of commenters on items they didn't author
Expand Down
41 changes: 41 additions & 0 deletions github_activity/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
url
author {
login
__typename
}
}
}
Expand All @@ -31,6 +32,7 @@
authorAssociation
author {
login
__typename
}
}
}
Expand Down Expand Up @@ -81,6 +83,7 @@
authorAssociation
author {
login
__typename
}
reactions(content: THUMBS_UP) {
totalCount
Expand All @@ -100,6 +103,7 @@
{base_elements}
mergedBy {{
login
__typename
}}
mergeCommit {{
oid
Expand Down Expand Up @@ -233,8 +237,45 @@ def request(self, n_pages=100, n_per_page=50):
if not pageInfo["hasNextPage"]:
break

# Extract bot users from raw data before DataFrame conversion
def is_bot(user_dict):
"""Check if a GraphQL user object represents a bot account."""
if not user_dict:
return False
return user_dict.get("__typename") == "Bot"

bot_users = set()
for item in self.issues_and_or_prs:
# Check author
author = item.get("author")
if is_bot(author):
bot_users.add(author["login"])

# Check mergedBy
merged_by = item.get("mergedBy")
if is_bot(merged_by):
bot_users.add(merged_by["login"])

# Check reviewers
reviews = item.get("reviews")
if reviews:
for review in reviews.get("edges", []):
review_author = review["node"].get("author")
if is_bot(review_author):
bot_users.add(review_author["login"])

# Check commenters
comments = item.get("comments")
if comments:
for comment in comments.get("edges", []):
comment_author = comment["node"].get("author")
if is_bot(comment_author):
bot_users.add(comment_author["login"])

# Create a dataframe of the issues and/or PRs
self.data = pd.DataFrame(self.issues_and_or_prs)
# Store bot users in DataFrame metadata (attrs dict)
self.data.attrs["bot_users"] = bot_users

# Add some extra fields
def get_login(user):
Expand Down