Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 47 additions & 39 deletions montage/labs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,25 @@
DB_CONFIG = os.path.expanduser('~/replica.my.cnf')


IMAGE_COLS = ['img_width',
'img_height',
'img_name',
'img_major_mime',
'img_minor_mime',
'IFNULL(oi.actor_user, ci.actor_user) AS img_user',
'IFNULL(oi.actor_name, ci.actor_name) AS img_user_text',
'IFNULL(oi_timestamp, img_timestamp) AS img_timestamp',
'img_timestamp AS rec_img_timestamp',
'ci.actor_user AS rec_img_user',
'ci.actor_name AS rec_img_text',
'oi.oi_archive_name AS oi_archive_name']
# Column mapping for the new file/filerevision schema on wikireplica.
# The old image/oldimage tables are being removed (deadline: 28 May 2026,
# see https://phabricator.wikimedia.org/T123582 and hatnote/montage#504).
#
# Mapping summary:
# image -> file (img_* cols -> file_* cols, img_actor -> file_actor)
# oldimage -> filerevision (oi_* cols -> fr_* cols, oi_actor -> fr_actor)
FILE_COLS = ['fi.file_width AS img_width',
'fi.file_height AS img_height',
'fi.file_name AS img_name',
'fi.file_media_type AS img_major_mime',
'fi.file_minor_mime AS img_minor_mime',
'IFNULL(fr.actor_user, ci.actor_user) AS img_user',
'IFNULL(fr.actor_name, ci.actor_name) AS img_user_text',
'IFNULL(fr.fr_timestamp, fi.file_timestamp) AS img_timestamp',
'fi.file_timestamp AS rec_img_timestamp',
'ci.actor_user AS rec_img_user',
'ci.actor_name AS rec_img_text',
'fr.fr_archive_name AS oi_archive_name']


class MissingMySQLClient(RuntimeError):
Expand All @@ -42,8 +49,7 @@ def fetchall_from_commonswiki(query, params):
cursor.execute(query, params)
res = cursor.fetchall()

# looking at the schema on labs, it's all varbinary, not varchar,
# so this block converts values
# schema on labs is varbinary, not varchar, so decode each value
ret = []
for rec in res:
new_rec = {}
Expand All @@ -56,28 +62,29 @@ def fetchall_from_commonswiki(query, params):


def get_files(category_name):
# Migrated from image/oldimage to file/filerevision (see issue #504)
query = '''
SELECT {cols}
FROM commonswiki_p.image AS i
LEFT JOIN actor AS ci ON img_actor=ci.actor_id
LEFT JOIN (SELECT oi_name,
oi_actor,
FROM commonswiki_p.`file` AS fi
LEFT JOIN actor AS ci ON fi.file_actor=ci.actor_id
LEFT JOIN (SELECT fr_name,
fr_actor,
actor_user,
actor_name,
oi_timestamp,
oi_archive_name
FROM oldimage
LEFT JOIN actor ON oi_actor=actor.actor_id) AS oi ON img_name=oi.oi_name
fr_timestamp,
fr_archive_name
FROM filerevision
LEFT JOIN actor ON fr_actor=actor.actor_id) AS fr ON fi.file_name=fr.fr_name
JOIN page ON page_namespace = 6
AND page_title = img_name
AND page_title = fi.file_name
JOIN categorylinks ON cl_from = page_id
AND cl_type = 'file'
JOIN linktarget ON cl_target_id = lt_id
AND lt_namespace = 14
AND lt_title = %s
GROUP BY img_name
ORDER BY oi_timestamp ASC;
'''.format(cols=', '.join(IMAGE_COLS))
GROUP BY fi.file_name
ORDER BY fr.fr_timestamp ASC;
'''.format(cols=', '.join(FILE_COLS))
params = (category_name.replace(' ', '_'),)

results = fetchall_from_commonswiki(query, params)
Expand All @@ -86,22 +93,23 @@ def get_files(category_name):


def get_file_info(filename):
# Migrated from image/oldimage to file/filerevision (see issue #504)
query = '''
SELECT {cols}
FROM commonswiki_p.image AS i
LEFT JOIN actor AS ci ON img_actor=ci.actor_id
LEFT JOIN (SELECT oi_name,
oi_actor,
FROM commonswiki_p.`file` AS fi
LEFT JOIN actor AS ci ON fi.file_actor=ci.actor_id
LEFT JOIN (SELECT fr_name,
fr_actor,
actor_user,
actor_name,
oi_timestamp,
oi_archive_name
FROM oldimage
LEFT JOIN actor ON oi_actor=actor.actor_id) AS oi ON img_name=oi.oi_name
WHERE img_name = %s
GROUP BY img_name
ORDER BY oi_timestamp ASC;
'''.format(cols=', '.join(IMAGE_COLS))
fr_timestamp,
fr_archive_name
FROM filerevision
LEFT JOIN actor ON fr_actor=actor.actor_id) AS fr ON fi.file_name=fr.fr_name
WHERE fi.file_name = %s
GROUP BY fi.file_name
ORDER BY fr.fr_timestamp ASC;
'''.format(cols=', '.join(FILE_COLS))
params = (filename.replace(' ', '_'),)
results = fetchall_from_commonswiki(query, params)
if results:
Expand All @@ -112,4 +120,4 @@ def get_file_info(filename):

if __name__ == '__main__':
imgs = get_files('Images_from_Wiki_Loves_Monuments_2015_in_France')
import pdb; pdb.set_trace()
print('got %d images' % len(imgs))