-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsolution.py
More file actions
115 lines (91 loc) · 3.89 KB
/
solution.py
File metadata and controls
115 lines (91 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Solution for https://coderun.yandex.ru/problem/user-logs-sessions-events
# Other solutions: https://github.com/Melodiz/CodeRun
from datetime import datetime, timedelta
def count_sessions(logs):
"""Count sessions that started on 2020-04-19"""
target_date = datetime.strptime("2020-04-19", "%Y-%m-%d").date()
users_activities = {}
# Group logs by user_id and convert to datetime
for log in logs:
date, user_id, event_type, parameter = log.split(',')
dt = datetime.strptime(date, "%Y-%m-%d_%H:%M:%S")
if user_id not in users_activities:
users_activities[user_id] = []
users_activities[user_id].append(dt)
sessions = 0
for user_id, activities in users_activities.items():
activities.sort()
# Find session boundaries for this user
session_starts = []
if activities:
session_starts.append(activities[0]) # First activity starts a session
# Find additional session starts (after 30+ minute gaps)
for i in range(1, len(activities)):
if activities[i] - activities[i-1] >= timedelta(minutes=30):
session_starts.append(activities[i])
# Count sessions that started on target date
for session_start in session_starts:
if session_start.date() == target_date:
sessions += 1
return sessions
def find_most_viewed_day(logs):
"""Find day with maximum unique users who watched videos"""
daily_users = {} # date -> set of user_ids
for log in logs:
date, user_id, event_type, parameter = log.split(',')
if event_type == "2" and parameter == "video":
dt = datetime.strptime(date, "%Y-%m-%d_%H:%M:%S")
date_str = dt.strftime("%Y-%m-%d")
if date_str not in daily_users:
daily_users[date_str] = set()
daily_users[date_str].add(user_id)
# Find day with most unique users
max_users = 0
best_day = None
for day, users in daily_users.items():
if len(users) > max_users:
max_users = len(users)
best_day = day
return max_users
def find_most_active_interval(logs):
"""Find 5-minute interval with most events"""
# Extract and sort all timestamps
timestamps = []
for log in logs:
date, user_id, event_type, parameter = log.split(',')
timestamps.append(datetime.strptime(date, "%Y-%m-%d_%H:%M:%S"))
timestamps.sort()
max_events = 0
best_start_time = None
# Use sliding window approach
for i in range(len(timestamps)):
start_time = timestamps[i]
end_time = start_time + timedelta(minutes=5)
# Count events in this 5-minute window
events_count = 0
for j in range(i, len(timestamps)):
if timestamps[j] < end_time:
events_count += 1
else:
break
# Update best interval (prefer later time if tied)
if events_count > max_events or (events_count == max_events and start_time > best_start_time):
max_events = events_count
best_start_time = start_time
return best_start_time.strftime("%Y-%m-%d_%H:%M:%S")
def main():
"""Main function to read logs and solve all three problems"""
data = []
with open("log.csv", "r") as file:
for line in file:
data.append(line.strip())
# Skip header if present
if data and (data[0].startswith('date') or data[0].startswith('time')):
data = data[1:]
sessions = count_sessions(data)
most_viewed_day_count = find_most_viewed_day(data)
most_active_interval = find_most_active_interval(data)
# Output in required format: sessions, max_users, interval_start
print(f"{sessions} {most_viewed_day_count} {most_active_interval}")
if __name__ == "__main__":
main()