import json
from PIL import Image
import os
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import scipy
import scipy.ndimage
import math
import cv2
from spotlight import spotlight
sz = 15
BS = 200 # bubble size (radius in pixels) used for the zoomlens task
def image_histogram_equalization(image, number_bins=256):
# from http://www.janeriksolem.net/2009/06/histogram-equalization-with-python-and.html
# get image histogram
image_histogram, bins = np.histogram(image.flatten(), number_bins, normed=True)
cdf = image_histogram.cumsum() # cumulative distribution function
cdf = 255 * cdf / cdf[-1] # normalize
# use linear interpolation of cdf to find new pixel values
image_equalized = np.interp(image.flatten(), bins[:-1], cdf)
return image_equalized.reshape(image.shape)
def transparent_cmap(cmap, N=255):
"Copy colormap and set alpha values"
mycmap = cmap
mycmap._init()
mycmap._lut[:,-1] = np.linspace(0, 0.8, N+4)
return mycmap
def process_timestamp(curtime): # parse ISO 0861 date string
#curtime = '2017-03-13T15:23:44.175Z'
res = curtime.split('T')
date,timestamp = res[0],res[1]
timestamp = timestamp.replace('Z','')
#print(timestamp)
res = timestamp.split(':')
return float(res[1])*60+float(res[2]) # minute * 60 + seconds elapsed (for easy computation purposes)
filename = 'out_sept26.json'
with open(filename) as data_file:
data = json.load(data_file)
#data = []
#with open(filename) as f:
# for line in f:
# data.append(json.loads(line))
data['A116DNB5LSACL9_3H781YYV6TRNQRMDDHXM3LXO0HUTE9'][3]
data is structured as a list of HITs, each of which contains a list of images, each of which constains the filename, tags, and mouse movements (list of events) for the image (as well as whether it was a validation image in terms of tags). e.g. to get all the mouse movement events of user A11... on image 3: data['A116DNB5LSACL9_3H781YYV6TRNQRMDDHXM3LXO0HUTE9'][3]['events']
locs_by_img = dict();
tags_by_img = dict();
times_for_locs = dict();
for hit in data:
for im in data[hit]:
coords = []
times = []
prevtime = 0
for ev in im['events']:
if ev['x'] is not None and ev['y'] is not None:
coords.append((ev['x'],ev['y']))
#print('got here %d'%prevtime)
curtime = process_timestamp(ev['time'])
if prevtime>0: # only start counting from the second entry
times.append(curtime-prevtime)
prevtime = curtime
if isinstance(im['filename'],list):
assert(len(im['filename'])==1) # list of one element
filename = im['filename'][0]
else:
filename = im['filename']
if filename not in locs_by_img.keys():
locs_by_img[filename] = dict()
tags_by_img[filename] = dict()
times_for_locs[filename] = dict()
locs_by_img[filename][hit] = coords[:-1] # eliminate the last time point
tags_by_img[filename][hit] = im['tags']
times_for_locs[filename][hit] = times
assert(len(locs_by_img[filename][hit])==len(times_for_locs[filename][hit]))
filename = 'why-protect-antarcticas-ocean-2_503b538a2f76d.jpg'
curim = locs_by_img[filename]
print('Seen by',len(curim),'users.')
imdir = "/data/graphics/SpandanGraphsProject/fullsize-60k/"
im = Image.open(os.path.join(imdir,filename))
im_arr = np.array(im, dtype=np.uint8)
[ydim,xdim,dim3] = im_arr.shape
# run this cell to compute coordinates per user
# binary viewing path per viewer
fig = plt.figure(figsize=(sz,sz))
dim2 = math.ceil(len(curim)/3.0)
ii = 1
for hit in curim:
coords = curim[hit]
xs = [min(math.floor(elem[0]*xdim),xdim-1) for elem in coords]
ys = [min(math.floor(elem[1]*ydim),ydim-1) for elem in coords]
if not len(ys) or not len(xs):
continue
res = np.zeros((ydim,xdim),dtype=np.uint8)
indexes = (np.array(ys),np.array(xs))
res[indexes] = 255.0
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(BS,BS))
dilation = cv2.dilate(res,kernel,iterations = 1)
plt.subplot(dim2,3,ii); ii += 1
plt.imshow(dilation); plt.axis('off')
# filter out outliners (using inter-quartile range)
import scipy.stats
plt.hist(curtimes); plt.show()
IQR = scipy.stats.iqr(curtimes)
Q1 = np.percentile(curtimes,25)
Q3 = np.percentile(curtimes,75)
print('Low bound: %2.2f, Upper bound: %2.2f'%(Q1-3*IQR,Q3+3*IQR))
curtimes2 = [c for c in curtimes if c>Q1-3*IQR and c<Q3+3*IQR]
plt.hist(curtimes2)
# run this cell to compute coordinates per user
# every point is weighted by viewing time (normalized to max time)
filteroutliers = True # filter out outliers (points with super high viewing times)
fig = plt.figure(figsize=(sz,sz))
dim2 = math.ceil(len(curim)/3.0)
curim = locs_by_img[filename]
hits = [k for k in curim.keys()]
times_for_locs[filename][hit]
ii = 1
for i in range(len(curim)):
coords = curim[hits[i]]
curtimes = times_for_locs[filename][hits[i]]
xs = [min(math.floor(elem[0]*xdim),xdim-1) for elem in coords]
ys = [min(math.floor(elem[1]*ydim),ydim-1) for elem in coords]
if not len(ys) or not len(xs):
continue
res = np.zeros((ydim,xdim),dtype=np.uint8)
if filteroutliers:
IQR = scipy.stats.iqr(curtimes)
Q1 = np.percentile(curtimes,25)
Q3 = np.percentile(curtimes,75)
curtimes2 = [c for c in curtimes if c>Q1-3*IQR and c<Q3+3*IQR]
maxval = np.amax(curtimes2)
else:
maxval = np.amax(curtimes)
for j in range(len(xs)):
c = curtimes[j]
if filteroutliers and (c<Q1-3*IQR or c>Q3+3*IQR):
continue
res[ys[j]][xs[j]] = 255.0*c/float(maxval)
#res[ys[i]][xs[i]] = float(c)
#maxval = np.amax(res)/255.0
#if maxval:
# print('got here')
# #res = 255.0*res/float(maxval)
# res //= maxval
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(BS,BS))
dilation = cv2.dilate(res,kernel,iterations = 1)
#heatmap = scipy.ndimage.filters.gaussian_filter(dilation,sigma=10)
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(200,200))
#heatmap = cv2.dilate(res,kernel,iterations = 1)
plt.subplot(dim2,3,ii);
plt.imshow(dilation); plt.axis('off'); plt.title(ii)
print('%d)'%(ii),end='')
print(tags_by_img[filename][hits[i]])
ii += 1
# run this cell to compute coordinates per user
# every point is weighted by viewing time (normalized to max time)
mycmap = transparent_cmap(cmap=plt.get_cmap('viridis'))
filteroutliers = True # filter out outliers (points with super high viewing times)
fig = plt.figure(figsize=(sz,sz))
dim2 = math.ceil(len(curim)/3.0)
curim = locs_by_img[filename]
hits = [k for k in curim.keys()]
times_for_locs[filename][hit]
ii = 1
for i in range(len(curim)):
coords = curim[hits[i]]
curtimes = times_for_locs[filename][hits[i]]
xs = [min(math.floor(elem[0]*xdim),xdim-1) for elem in coords]
ys = [min(math.floor(elem[1]*ydim),ydim-1) for elem in coords]
if not len(ys) or not len(xs):
continue
res = np.zeros((ydim,xdim),dtype=np.uint8)
if filteroutliers:
IQR = scipy.stats.iqr(curtimes)
Q1 = np.percentile(curtimes,25)
Q3 = np.percentile(curtimes,75)
curtimes2 = [c for c in curtimes if c>Q1-3*IQR and c<Q3+3*IQR]
maxval = np.amax(curtimes2)
else:
maxval = np.amax(curtimes)
for j in range(len(xs)):
c = curtimes[j]
if filteroutliers and (c<Q1-3*IQR or c>Q3+3*IQR):
continue
res[ys[j]][xs[j]] = 255.0*c/float(maxval)
#res[ys[i]][xs[i]] = float(c)
#maxval = np.amax(res)/255.0
#if maxval:
# print('got here')
# #res = 255.0*res/float(maxval)
# res //= maxval
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(BS,BS))
dilation = cv2.dilate(res,kernel,iterations = 1)
#heatmap = scipy.ndimage.filters.gaussian_filter(dilation,sigma=10)
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(200,200))
#heatmap = cv2.dilate(res,kernel,iterations = 1)
#plt.subplot(dim2,3,ii);
#ax = plt.subplot(dim2,3,ii)
#fig,ax = plt.figure()
#fig, ax = plt.subplots(figsize=(8,8))
fig, ax = plt.subplots(figsize=(sz,sz))
#ax = plt.figure(figsize=(sz,sz))
ax.imshow(im)
#cb = ax.contourf(x, y, heatmap.reshape(x.shape[0], y.shape[1]), 15, cmap=mycmap)
ax.imshow(dilation, cmap=mycmap);
plt.axis('off'); plt.title(ii);
print('%d)'%(ii),end='')
print(tags_by_img[filename][hits[i]])
plt.show()
ii += 1
# run this cell if want to aggregate all coordinates together
allcoords = []
for hit in curim:
coords = curim[hit]
allcoords.extend(coords)
xs = [min(math.floor(elem[0]*xdim),xdim-1) for elem in allcoords]
ys = [min(math.floor(elem[1]*ydim),ydim-1) for elem in allcoords]
res = np.zeros((ydim,xdim),dtype=np.uint8)
indexes = (np.array(ys),np.array(xs))
res[indexes] = 255.0
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(50,50))
dilation = cv2.dilate(res,kernel,iterations = 1)
plt.imshow(dilation)
heatmap = scipy.ndimage.filters.gaussian_filter(dilation,sigma=50)
#heatmap = scipy.ndimage.filters.gaussian_filter(res,sigma=50)
targetdim = min(xdim,ydim);
targetdim = round(targetdim*2/3.0);
rowsums = heatmap.sum(axis=1)
colsums = heatmap.sum(axis=0)
inds = np.argsort(rowsums) # first elements of inds will be ones with smallest row sums
inds = np.flip(inds,axis=0) # first elements are most important
colinds = np.argsort(colsums)
colinds = np.flip(colinds,axis=0)
# cut rows
keepthese = inds[0:targetdim]
keepthese.sort()
thumb = im_arr[keepthese,:]
# cut columns
keepthese = colinds[0:targetdim]
keepthese.sort()
thumb = thumb[:,keepthese]
fig = plt.figure(figsize=(sz,sz))
plt.subplot(1,4,1); plt.axis('off'); plt.title('Input')
plt.imshow(im)
plt.subplot(1,4,2); plt.axis('off'); plt.title('Mouse heatmap')
plt.imshow(heatmap)
spot = spotlight(im,heatmap,toplot=False)
plt.subplot(1,4,3); plt.axis('off'); plt.title('Spotlight')
plt.imshow(spot)
plt.subplot(1,4,4); plt.axis('off'); plt.title('Thumbnail')
plt.imshow(thumb)
TODO:
# run for all images at once (I just copied code from multiple cells above into a single cell here
# in order to run it on all images at once; the cells above can be used for testing individual parts)
imdir = "/data/graphics/SpandanGraphsProject/fullsize-60k/"
imdir2 = "/data/graphics/graphsProject/graphScraping/Img_folder"
mycmap = transparent_cmap(cmap=plt.get_cmap('viridis'))
withthumbnail = False
pn = 4 if withthumbnail == True else 3
for filename in locs_by_img:
curim = locs_by_img[filename]
if os.path.isfile(os.path.join(imdir,filename)):
im = Image.open(os.path.join(imdir,filename))
else:
im = Image.open(os.path.join(imdir2,filename))
im_arr = np.array(im, dtype=np.uint8)
if len(im_arr.shape)==3:
[ydim,xdim,dim3] = im_arr.shape
else:
[ydim,xdim] = im_arr.shape
res = np.zeros((ydim,xdim),dtype=np.uint8)
nusers = len(curim)
numus = 0
for hit in curim:
allcoords = curim[hit]
if allcoords:
numus += 1
else:
continue
xs = [min(math.floor(elem[0]*xdim),xdim-1) for elem in allcoords]
ys = [min(math.floor(elem[1]*ydim),ydim-1) for elem in allcoords]
#res_temp = np.zeros((ydim,xdim),dtype=np.uint8)
for j in range(len(xs)):
res[ys[j]][xs[j]] += 1 # aggregate "fixations" per pixel
#res_temp[ys[j]][xs[j]] += 1
maxval = np.amax(res)/255.0
res = np.divide(res,maxval)
sig = int(0.03*min(ydim,xdim))
#sig = BS
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(sig,sig))
dilation = res
#dilation = cv2.dilate(res,kernel,iterations = 1)
#heatmap = dilation
heatmap = scipy.ndimage.filters.gaussian_filter(dilation,sigma=sig)
# equalize the heatmap
heatmap = image_histogram_equalization(heatmap, number_bins=256)
# --------- MAKE THUMBNAIL ---------
targetdim = min(xdim,ydim);
targetdim = round(targetdim*2/3.0); # make the thumbnail have square side length = 2/3 of the smallest side length
rowsums = heatmap.sum(axis=1)
colsums = heatmap.sum(axis=0)
inds = np.argsort(rowsums) # first elements of inds will be ones with smallest row sums
inds = np.flip(inds,axis=0) # first elements are most important
colinds = np.argsort(colsums)
colinds = np.flip(colinds,axis=0)
# cut rows
keepthese = inds[0:targetdim]
keepthese.sort()
thumb = im_arr[keepthese,:]
# cut columns
keepthese = colinds[0:targetdim]
keepthese.sort()
thumb = thumb[:,keepthese]
# --------- GENERATE PLOTS ---------
fig = plt.figure(figsize=(sz,sz))
plt.rcParams['image.cmap'] = 'gist_heat'
plt.subplot(1,pn,1); plt.axis('off'); #plt.title(filename)
plt.imshow(im)
#plt.subplot(1,pn,2); plt.axis('off'); plt.title('Mouse heatmap (%d)'%(nusers))
#plt.imshow(heatmap)
# Import image and get x and y extents
#p = np.asarray(im).astype('float')
#w, h = im.size
w, h = im.size
y, x = np.mgrid[0:h, 0:w]
#Plot image and overlay colormap
ax = plt.subplot(1,pn,2)
ax.imshow(im)
cb = ax.contourf(x, y, heatmap.reshape(x.shape[0], y.shape[1]), 15, cmap=mycmap)
plt.axis('off');
plt.title('%d viewers'%(numus))
#plt.colorbar(cb)
# ---------------
spot = spotlight(im,heatmap,toplot=False)
plt.subplot(1,pn,3); plt.axis('off'); plt.title('Spotlight')
plt.imshow(spot)
if withthumbnail:
plt.subplot(1,pn,4); plt.axis('off'); plt.title('Thumbnail')
plt.imshow(thumb)
alltags = []
for entry in tags_by_img[filename].keys():
alltags.extend(tags_by_img[filename][entry])
print(alltags)
plt.show()