-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yaml.sample
More file actions
253 lines (213 loc) · 8.83 KB
/
config.yaml.sample
File metadata and controls
253 lines (213 loc) · 8.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# Git Stats Configuration Template
# Configuration for analyzing git repositories and uploading to Elasticsearch
#
# IMPORTANT: This configuration supports idempotent updates!
# You can safely run the analysis and upload scripts multiple times without creating duplicates.
# Each commit uses a unique document ID in Elasticsearch:
# - Commits: {repository}_{commit_hash}
#
# Quick update command: ./run.sh
#
# SETUP INSTRUCTIONS:
# 1. Copy this file to config.yaml: cp config.yaml.sample config.yaml
# 2. Update the email_mapping section with your team members
# 3. Update repository_urls with your git repositories
# 4. Adjust other settings as needed
# ============================================================================
# Email Mapping
# ============================================================================
# Map multiple email addresses to a single person name
# This ensures contributors who changed email addresses are counted as one person
email_mapping:
"Example Developer":
- "developer@example.com"
- "dev@company.com"
"Jenkins":
- "jenkins@ci.example.com"
- "github-actions[bot]@users.noreply.github.com"
# ============================================================================
# Parallelization Configuration
# ============================================================================
parallelization:
# Maximum number of parallel workers for repository analysis
# Default: number of CPU cores (or 4 if CPU count cannot be determined)
# Set to 1 to disable parallelization
max_workers: null # null = auto-detect CPU count
# ============================================================================
# Repository Configuration
# ============================================================================
repositories:
# Base directory containing all git repositories to analyze
base_directory: "./repositories"
# List of specific repositories to analyze (leave empty to analyze all)
# If empty, all subdirectories in base_directory will be analyzed
repositories_to_analyze: []
# Git repository URLs to clone/analyze
repository_urls:
- "git@github.com:kibotu/git-panorama.git"
# Add more repositories here
# Repositories where ALL files should be included (no exclusions)
# These are typically build tool repositories where "generated" files are the product
include_all_files:
- "example-build-repo"
# ============================================================================
# Analysis Configuration
# ============================================================================
analysis:
# Date range for analysis (YYYY-MM-DD format)
# Leave empty for all time
start_date: ""
end_date: "" # Empty means until now
# Include commits from all branches (not just main/master)
all_branches: true
# Exclude merge commits from analysis
exclude_merge_commits: true
# Output directory for generated reports and temporary files
output_directory: "./git-stats"
# ============================================================================
# Elasticsearch Configuration
# ============================================================================
elasticsearch:
host: "localhost"
port: 9200
# Index name for commit data
commit_index: "git-commits"
# Batch size for bulk uploads (number of documents per batch)
# Higher values = faster uploads but more memory usage
# Recommended: 1000-5000 depending on available memory
bulk_batch_size: 3000
# ============================================================================
# File Exclusion Rules
# ============================================================================
# Define which files should be excluded from line change analysis
# This focuses metrics on actual developer-written code
exclusions:
# Files to ALWAYS exclude (regex patterns)
patterns:
# Gradle wrapper files
- pattern: ".*gradlew$"
description: "Gradle wrapper script"
- pattern: ".*gradlew\\.bat$"
description: "Gradle wrapper batch file"
# Lock files
- pattern: ".*/package-lock\\.json$"
description: "npm lock file"
- pattern: ".*/yarn\\.lock$"
description: "Yarn lock file"
- pattern: ".*/composer\\.lock$"
description: "PHP Composer lock file"
- pattern: ".*/Gemfile\\.lock$"
description: "Ruby Bundler lock file"
- pattern: ".*/Podfile\\.lock$"
description: "CocoaPods lock file"
- pattern: ".*/pnpm-lock\\.yaml$"
description: "pnpm lock file"
# JSON configuration files (with exceptions)
- pattern: ".*\\.json$"
description: "JSON configuration files"
# Data export files
- pattern: ".*\\.csv$"
description: "CSV data files"
# Schema definition files
- pattern: ".*\\.xsd$"
description: "XML Schema Definition files"
# Graphics and image files
- pattern: ".*\\.svg$"
description: "SVG graphics files"
- pattern: ".*\\.png$"
description: "PNG image files"
- pattern: ".*\\.webp$"
description: "WebP image files"
- pattern: ".*\\.(jpeg|jpg|gif|ico|tiff)$"
description: "Image files"
- pattern: ".*\\.lottie$"
description: "Lottie animation files"
# Font files
- pattern: ".*\\.ttf$"
description: "TrueType font files"
# Binary and archive files
- pattern: ".*\\.(jar|zip|tar|enc)$"
description: "Binary and archive files"
# Media files
- pattern: ".*\\.(pdf|mp3|wave|wav)$"
description: "Media files"
# Certificate and security files
- pattern: ".*\\.(crt|pub|csr)$"
description: "Certificate files"
# Lock files (general)
- pattern: ".*\\.(lock|resolved|sum)$"
description: "Lock and checksum files"
# Xcode project files
- pattern: ".*\\.(pbxproj|xcworkspacedata|xcscheme)$"
description: "Xcode project files"
# Git configuration
- pattern: ".*\\.(gitignore|gitkeep|eslintignore)$"
description: "Git and linter ignore files"
# Test snapshots
- pattern: ".*\\.snap$"
description: "Jest snapshot files"
# XML configuration files
- pattern: ".*\\.xml$"
description: "XML configuration files"
# Build/distribution directories
- pattern: ".*/dist/.*"
description: "Distribution/build output directories"
- pattern: ".*/build/.*"
description: "Build output directories"
- pattern: ".*/node_modules/.*"
description: "Node modules"
- pattern: ".*/vendor/.*"
description: "Vendor dependencies"
# Source map files
- pattern: ".*\\.map$"
description: "Source map files"
# OpenAPI generator files
- pattern: ".*/\\.openapi-generator/.*"
description: "OpenAPI generator metadata"
- pattern: ".*\\.openapi-generator-ignore$"
description: "OpenAPI generator ignore file"
# Files to ALWAYS include (exceptions to exclusion patterns)
always_include:
# Project configuration files
- pattern: ".*/package\\.json$"
description: "npm package configuration"
# Documentation files
- pattern: ".*/README\\.md$"
description: "README documentation"
- pattern: ".*/readme\\.md$"
description: "README documentation (lowercase)"
# Android project files
- pattern: ".*/AndroidManifest\\.xml$"
description: "Android manifest files"
- pattern: ".*/src/.*/res/.*"
description: "Android resource files"
# Repository-specific exclusion rules
repository_specific: {}
# ============================================================================
# Notes
# ============================================================================
# - All regex patterns use Python regex syntax
# - Patterns are matched against full file paths
# - Exclusion patterns are applied AFTER inclusion patterns
# - Repository-specific rules override global rules
# - Email addresses in mapping are case-insensitive
#
# Configuration Change Impact:
# - Settings that require Docker restart:
# * elasticsearch.host, elasticsearch.port (requires: docker compose restart)
# * Changes to docker-compose.yml or config/grafana/* (requires: docker compose restart)
#
# - Settings that take effect on next ./run.sh:
# * email_mapping - immediately affects author name mapping
# * repositories.* - affects which repos are cloned/analyzed
# * analysis.* - affects date range, branch filtering, merge commits
# * exclusions.* - affects which files are counted in metrics
# * parallelization.max_workers - affects analysis speed
# * elasticsearch.bulk_batch_size - affects upload batch size
# * elasticsearch.commit_index - affects which index is used
#
# - Environment variable overrides (highest priority):
# * ES_HOST, ES_PORT - override elasticsearch.host/port
# * BATCH_SIZE - overrides elasticsearch.bulk_batch_size
# * DATA_DIR - overrides analysis.output_directory
# * CONFIG_FILE - use different config file