Commit cc528bb0 authored by Vladyslav Yazykov's avatar Vladyslav Yazykov
Browse files

Trying Bow_600k

parent b08b6966
......@@ -6,46 +6,55 @@ import time
import os.path
import json
import traceback
from dataclasses import dataclass
import h5py
import numpy as np
from flask import Flask, request, jsonify
from werkzeug.exceptions import HTTPException
from utilities.visualize_local_features import visualize_local_features
from utilities.path2id import path2id
from utilities import read_yaml, dotdict
from collections import defaultdict
## TODO: uncomment for BoW
# from data_loading.read_invfile import *
from data_loading.read_data_mpv import *
from loading import load_data
from engine import Engine
from image_search import search_methods
# Config
DATASETS = {
# Dataset
"bow_600k": {
# Engine
# - when in the singularity container, the path 'horn:/local/vrg3_demo/vrg3_demo/data' is mapped to '/vrg3/data'
"zoom_in": '/local/vrg3_demo/vrg3_demo/data/bow_600k/mpv_files/mpvdb50k_haff2.mat', ## TODO: uncomment for BoW "/local/vrg3_demo/vrg3_demo/data/bow_600k/files/invfile.dat"
},
}
# Input data loading: inverted file
ENGINE = load_data(DATASETS)
engines = defaultdict(dict)
datasets = read_yaml("/local/vrg3_demo/vrg3_demo/app/engine_vdvm22/config.yml").datasets
for dataset, dataset_config in datasets.items():
for engine, engine_config in dataset_config.items():
data = load_data(
invfile_path=engine_config["inverted_file"],
cached_lengths_path=engine_config["cached_lengths"],
geometries_path=engine_config["geometries"],
cid2id_path=engine_config["cid2id"],
id2cid_path=engine_config["id2cid"],
image_sizes_path=engine_config["image_sizes"],
)
engines[dataset][engine] = Engine(data), engine_config
print(f"Loaded engine '{engine}' engine for dataset '{dataset}'")
engines = dotdict(engines)
# API endpoint
app = Flask(__name__)
def _parse_request(request):
# For testing:
# return {"dataset_name": "bow_600k", "engine_name": "zoom_in", "offset": 0, "limit": 10}
if request.content_type == "application/json":
data = request.get_json()
else:
data = {'data': json.loads(request.form['data'])} if 'data' in request.form else {}
data = data['data']
assert data['dataset_name'] in ENGINE, "Unsupported dataset"
assert data['engine_name'] in ENGINE[data['dataset_name']], "Unsupported network"
assert data['dataset_name'] in engines, "Unsupported dataset"
assert data['engine_name'] in engines[data['dataset_name']], "Unsupported network"
return data
......@@ -77,51 +86,45 @@ def capabilities():
@app.route("/images", methods=['POST'])
def images():
time0 = time.time()
data = _parse_request(request)
dataset = ENGINE[data["dataset_name"]]
req = _parse_request(request)
mode = None
dataset = engines[req["dataset_name"]]
engine, config = dataset[req["engine_name"]]
if not data.get("query", None):
mode = None
if not req.get("query", None):
# Browsing functionality
# image paths from the chosen dataset
images = next(iter(dataset.values())).paths
np.random.seed(seed=0)
images = engine.image_paths(size=100, random=True)
images = [os.path.join(config["images_base_path"], image) for image in images]
# Show images in a random order
ranks = np.random.permutation(np.arange(len(images)))
ranks = np.arange(len(images))
mode = "browsing"
bbxs = [[] for i in range(len(ranks))]
elif data["query"]["type"] == "image":
bbxs = [[] for _ in range(len(ranks))]
elif req["query"]["type"] == "image":
# This will be either [] or in the following format [{'x1': 0.2608, 'x2': 0.6889, 'y1': 0.3993, 'y2': 0.6515}]
# if searched using the bounding box;
rectangles_over = []
if data["query"]["search_mode"].get("tool") == "rectangle":
rectangles_over.append(data["query"]["search_mode"]["tool_data"])
if req["query"]["search_mode"].get("tool") == "rectangle":
rectangles_over.append(req["query"]["search_mode"]["tool_data"])
# Search-by-image functionality
assert data["query"]["value"]["prefix"] == data["dataset_name"]
engine = dataset[data["engine_name"]]
images = engine.paths
## TODO: uncomment for BoW
#query_id = path2id(data["query"]["value"]["path"], engine.cid2id)
query_id = images.index(data["query"]["value"]["path"])
cid = req["query"]["value"]["path"].split("/")[-1]
# Transform user bbox to required format
if rectangles_over==[]:
# Set the bounding box to the full size of the image
user_bbox = convert_user_bbox({'x1': .0, 'x2': 1., 'y1': .0, 'y2': 1.}, data["query"]["value"]["path"], engine)
user_bbox = convert_user_bbox({'x1': .0, 'x2': 1., 'y1': .0, 'y2': 1.}, req["query"]["value"]["path"], engine)
else:
user_bbox = convert_user_bbox(rectangles_over[0], data["query"]["value"]["path"], engine)
search_mode = data["engine_options"]["mode"]
user_bbox = convert_user_bbox(rectangles_over[0], req["query"]["value"]["path"], engine)
search_mode = req["engine_options"]["mode"]
ranks, bbxs = search_methods(query_id, engine, bbox=user_bbox, mode=search_mode)
ranks, bbxs = search_methods(cid, engine, bbox=user_bbox, mode=search_mode)
# Remove query from results
if search_mode == "Unconstrained":
......@@ -133,32 +136,32 @@ def images():
raise ValueError("Unknown query type")
# Output building
slice = [images[i] for i in ranks[data['offset']:data['offset'] + data['limit']]] # Paging
slice = [images[i] for i in ranks[req['offset']:req['offset'] + req['limit']]] # Paging
parse_name = lambda x: os.path.splitext(os.path.basename(x))[0]
outdata = {
"results": len(images),
"images": [{"prefix": data['dataset_name'],
"images": [{"prefix": req['dataset_name'],
"path": x,
"overlays": {
"rank": data['offset'] + i + 1,
"rank": req['offset'] + i + 1,
"name": parse_name(x),
"loc_features": visualize_local_features(parse_name(x), dataset[data["engine_name"]], mode='point', n=100), #mode = 'full_geom'; 'point'
"loc_features": visualize_local_features(parse_name(x), engine, mode='point', n=100), #mode = 'full_geom'; 'point'
"shear_bbxs": [bbxs[i]],
}} for i, x in enumerate(slice)],
}
if mode == "image":
rectangles_over = []
if data["query"]["search_mode"].get("tool") == "rectangle":
rectangles_over.append(data["query"]["search_mode"]["tool_data"])
if req["query"]["search_mode"].get("tool") == "rectangle":
rectangles_over.append(req["query"]["search_mode"]["tool_data"])
outdata["query_text"] = f"{len(ranks)} results in {time.time() - time0:.3f}s for bbox: {str(rectangles_over)}"
outdata["query_text"] = "%s results in %.3fs for bbox: %s" % (
len(ranks), time.time() - time0, str(rectangles_over))
outdata["query_image"] = {
"overlays": {
"rectangles_over": rectangles_over,
"text_over": "search_mode=" + data['query']['search_mode']['id'],
"name": parse_name(data["query"]["value"]["path"]),
"text_over": "search_mode=" + req['query']['search_mode']['id'],
"name": parse_name(req["query"]["value"]["path"]),
}
}
......
file_paths:
inverted_file: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/invfile.dat
cached_lengths: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/cached_lengths.dat
geometries: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/geometries.h5
cid2id: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/cid2id.pkl
id2cid: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/id2cid.pkl
datasets:
bow_600k:
zoom_in:
inverted_file: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/invfile.dat
cached_lengths: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/cached_lengths.dat
geometries: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/geometries.h5
cid2id: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/cid2id.pkl
id2cid: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/id2cid.pkl
image_sizes: /local/vrg3_demo/vrg3_demo/data/bow_600k/files/image_sizes.npy
images_base_path: images/
import struct
import numpy as np
from utilities import sizeof_uint64_t
masks = np.array([0] + [
(1 << (i + 1)) - 1 for i in range(64)
], dtype=np.uint64)
class InvertedChunk:
def __init__(self,
data: bytes,
offset: int,
n_words: int,
code_bits: tuple[int, int, int],
max_bit: int,
weight: float):
self.data = data
self.offset = int(offset)
self.n_words = n_words
self.weight = weight
self.bits = [
*code_bits[:3],
max_bit
]
self.shifts = np.zeros(4, dtype=np.uint64)
for i in range(1, 4):
self.shifts[i] = (1 << self.bits[i - 1]) + self.shifts[i - 1]
self.current = 0
self.result = 0
self.remaining = 64
def decode(self):
# Unpack a single uint64_t
self.current = np.uint64(struct.unpack("<Q", self.data[self.offset:self.offset + sizeof_uint64_t])[0])
self.offset += sizeof_uint64_t
result = np.zeros(self.n_words, dtype=int)
current = np.uint64(0)
for i in range(self.n_words):
decoded = self._decode_next()
current = decoded + current
result[i] = current
return result
def _decode_next(self):
box = self._read_bits(2)
result = self._read_bits(self.bits[box]) + self.shifts[box]
return result
def _read_bits(self, n_bits):
if self.remaining >= n_bits:
self.remaining -= n_bits
return (self.current >> np.uint64(self.remaining)) & masks[n_bits]
result = (self.current << np.uint64(n_bits - self.remaining)) & masks[n_bits]
# Unpack a single uint64_t
self.current = np.uint64(struct.unpack("<Q", self.data[self.offset:self.offset + sizeof_uint64_t])[0])
self.offset += sizeof_uint64_t
self.remaining += 64 - n_bits
result += self.current >> np.uint64(self.remaining)
return result
......@@ -57,10 +57,3 @@ class Engine:
def load_data(data):
loaded = {}
for dataset, engines in data.items():
loaded[dataset] = {}
for engine, data_path in engines.items():
loaded[dataset][engine] = Engine(data_path)
return loaded
\ No newline at end of file
from typing import Optional
import os
import numpy as np
import pickle
from utilities import dotdict, BinaryFile, sizeof_uint64_t, sizeof_unsigned_char, sizeof_unsigned_int, cid2filename
class Engine:
"""Corresponds to the engine3 in cpp files (/home.dokt/jenicto2/cmpg2_demo/wbs/mexsrc/engine/en3_loadup.mex.cpp)
-> (/home.dokt/jenicto2/cmpg2_demo/wbs/mexsrc/engine/en3_get.mex.cpp )"""
def __init__(self, invfile_path: str, external_idf: Optional[np.ndarray] = None):
"""
Initializes the engine.
1. Reads the invfile from the path provided
2. If external idf is not provided, calculates the idf from the database
:param invfile_path: Path to the invfile.dat
:param external_idf: Shape: [num_labels, 1] - if provided, will use this idf instead of calculating it from the database
"""
# Loading cid->id and id->cid conversion files
base_data_loading_path = "/local/vrg3_demo/vrg3_demo/app/engine_vdvm22/data_loading/"
with open(os.path.join(base_data_loading_path, 'cid2id.pkl'), 'rb') as f:
self.cid2id = pickle.load(f)
with open(os.path.join(base_data_loading_path, 'id2cid.pkl'), 'rb') as f:
self.id2cid = pickle.load(f)
# Creating paths to all of the images, corresponding ids and cids
self.paths = [cid2filename(cid) for cid in self.cid2id.keys()]
self.ids = list(self.cid2id.values())
self.cids = list(self.cid2id.keys())
# Reading the inverted file
with BinaryFile(invfile_path) as f:
self.engine_version = f.read_string(8)
self.num_clusters = f.read_int() # number of clusters
self.num_documents = f.read_int() # todo: what is this?
# Reading actual inverted file matrix: visual words
self.data_length_in_bytes = f.read_uint64()
self.data = f.read_bytes(self.data_length_in_bytes * sizeof_uint64_t)
sizeof_ifile_head = sizeof_uint64_t + sizeof_unsigned_int + sizeof_unsigned_char
ifile_head_bytes = f.read_bytes(self.num_clusters * sizeof_ifile_head)
self.ifile_head = np.frombuffer(ifile_head_bytes, dtype=(np.dtype([('offset', 'Q'), ('wf', 'I'), ('maxbit', 'B')])))
self.code_bits = f.read_list(4, "int") # todo: what is this?
if external_idf is not None:
self.idf: np.ndarray = external_idf
else:
# Using document frequency, in how many documents does the vw appear
self.document_frequencies = f.read_list(self.num_clusters, "unsigned")
self.document_frequencies = np.array(self.document_frequencies)
self.idf: np.ndarray = np.log(self.num_documents / self.document_frequencies)
self.idf[np.isinf(self.idf)] = 0
# This is basically an inlined answer from the database call
self.opt = dotdict({
"dataset" : "bigzoom",
"geometry_cfg": {
"scale_bits" : 4,
"ellipse_bits" : 12,
"bits" : [11, 13, 15],
"inverted_compression_bits": [11, 12, 14],
},
"git_tag" : "f1.1",
"version" : 4,
"vhash" : "655e52775a53eabff09ba46e8055eb15",
})
# commands = ["idf", "geom", "size", "scalebins", "invertedlist", "docnfeat", "tcorr"]
# en3_get
def get_idf(self, return_word_frequencies=False):
idf = self.idf.copy() # todo: is it necessary to make a copy?
if not return_word_frequencies:
return idf
return idf, self.get_word_frequencies()
def get_word_frequencies(self):
return self.ifile_head["wf"]
def load_data(invfiles):
loaded = {}
for dataset, engines in invfiles.items():
loaded[dataset] = {}
for engine, invfile_path in engines.items():
loaded[dataset][engine] = Engine(invfile_path)
return loaded
if __name__ == '__main__':
invfile_path = "/local/vrg3_demo/vrg3_demo/data/bow_600k/files/invfile.dat"
engine = Engine(invfile_path)
print(f"Engine version: {engine.engine_version}")
print(f"Clus {engine.num_clusters}")
print(f"Doc {engine.num_documents}")
print(f"Dlen {engine.data_length_in_bytes}")
print(f"Data read {len(engine.data)}")
import h5py
import numpy as np
import sys
from .inverted_chunk import InvertedChunk
from loading import Data
from utilities import cid2filename
class Engine:
......@@ -21,6 +23,20 @@ class Engine:
self.n_vw_per_document = np.zeros(self.n_documents, dtype=np.uint32)
self.lengths = data.lengths
self.geometries_path = data.geometries_path
self.image_sizes = data.image_sizes
def image_paths(self, size=100, random=True):
size = min(size, self.n_documents)
if not random:
indices = np.arange(size)
else:
indices = np.random.choice(self.n_documents, size, replace=False)
cids = [self.id2cid[i] for i in indices]
paths = [cid2filename(cid) for cid in cids]
return paths
def get_documents_with_vw(self, vw: int):
"""
......@@ -38,11 +54,16 @@ class Engine:
:param document_id: The document id.
:return: A list of geometries.
"""
with h5py.File(self.geometries_path, "r") as f:
labels = f[f"geom/{document_id}/labels"][()].squeeze()
positions = f[f"geom/{document_id}/pos"][()]
return labels, positions
try:
with h5py.File(self.geometries_path, "r") as f:
labels = f[f"geom/{document_id}/labels"][()].squeeze()
positions = f[f"geom/{document_id}/pos"][()]
return labels, positions
except KeyError:
# For some images we don't have geometries :(
print(f"No geometries for {document_id}", file=sys.stderr)
return None, None
def query_nn(self, cid: int, top_k: int = 100):
"""
......
......@@ -6,16 +6,17 @@ from .spatial_verification import *
import scipy.io
import numpy as np
def search_methods(query_id, engine, bbox=[], mode="Unconstrained"):
def retrieve_query(query_id, bbox):
ranks, bboxes = make_query(query_id,
engine.visual_words, engine.geometries,
bbox,
engine.db,
engine.idf,
engine.options,
engine)
engine.visual_words, engine.geometries,
bbox,
engine.db,
engine.idf,
engine.options,
engine)
return ranks, bboxes
ranks, bboxes = retrieve_query(query_id, bbox)
......@@ -27,15 +28,15 @@ def search_methods(query_id, engine, bbox=[], mode="Unconstrained"):
ranks = [ranks[i] for i in sort]
bboxes = [bboxes[i] for i in sort]
elif mode == "Zoom-out":
scores = [get_bbox_area(bb, r, engine)/get_image_area(r, engine) for r, bb in zip(ranks, bboxes)]
scores = [get_bbox_area(bb, r, engine) / get_image_area(r, engine) for r, bb in zip(ranks, bboxes)]
sort = np.argsort(scores)
ranks = [ranks[i] for i in sort]
bboxes = [bboxes[i] for i in sort]
elif mode == "Unconstrained":
pass
......@@ -44,19 +45,21 @@ def search_methods(query_id, engine, bbox=[], mode="Unconstrained"):
return ranks, bboxes
def get_bbox_area(bbox, image_id, engine):
image_path_rel = engine.paths[image_id]
width, height = engine.get_img_size(image_path_rel)
x = [p['x']*width for p in bbox]
y = [p['y']*height for p in bbox]
x = [p['x'] * width for p in bbox]
y = [p['y'] * height for p in bbox]
a = (np.max(x) - np.min(x)) * (np.max(y) - np.min(y))
a = (np.max(x) - np.min(x))*(np.max(y) - np.min(y))
return a
return a
def get_image_area(image_id, engine):
image_path_rel = engine.paths[image_id]
width, height = engine.get_img_size(image_path_rel)
return width*height
\ No newline at end of file
return width * height
from .load_data import load_data, load_from_config
from .load_data import load_data
from .engine_config import EngineConfig
from .data_config import Data
from dataclasses import dataclass
from typing import Dict
from typing import Dict, List
import numpy as np
......@@ -16,3 +16,4 @@ class Data:
id2cid: Dict[int, int]
lengths: np.ndarray
geometries_path: str
image_sizes: List[int]
......@@ -8,7 +8,7 @@ from utilities import BinaryFile, sizeof_uint64_t, sizeof_unsigned_char, sizeof_
import yaml
def load_data(invfile_path: str, cached_lengths_path: str, geometries_path: str, cid2id_path: str, id2cid_path: str):
def load_data(invfile_path: str, cached_lengths_path: str, geometries_path: str, cid2id_path: str, id2cid_path: str, image_sizes_path: str):
with BinaryFile(invfile_path) as f:
engine_version = f.read_string(8)
n_vw = f.read_int() # number of clusters
......@@ -44,6 +44,8 @@ def load_data(invfile_path: str, cached_lengths_path: str, geometries_path: str,
with open(id2cid_path, "rb") as f:
id2cid = pickle.load(f)
image_sizes = np.load(image_sizes_path)
engine_config = EngineConfig(
version=engine_version,
n_vw=n_vw,
......@@ -59,20 +61,6 @@ def load_data(invfile_path: str, cached_lengths_path: str, geometries_path: str,
cid2id=cid2id,
id2cid=id2cid,
lengths=cached_lengths,
geometries_path=geometries_path
)
def load_from_config(config_path: str = "./config.yml"):
with open(config_path, "r") as f:
config = yaml.safe_load(f)
paths = config["file_paths"]
return load_data(
invfile_path=paths["inverted_file"],
cached_lengths_path=paths["cached_lengths"],
geometries_path=paths["geometries"],
cid2id_path=paths["cid2id"],
id2cid_path=paths["id2cid"],
geometries_path=geometries_path,
image_sizes=image_sizes
)
numpy
h5py
h5py==2.10.0
flask
scipy
torch
imagesize
\ No newline at end of file
imagesize
pyaml
\ No newline at end of file
......@@ -2,4 +2,4 @@ from .binary_file import BinaryFile
from .data_sizes import *
from .dotdict import dotdict
from .cid2filename import cid2filename