I needed something like this but couldn't find it on the web.... so time to write it myself. The idea is to have a python routine compile a set of data (which I can't share so it just generates some date related data) and then use a webpage with javascript to present record counts for different fields.
The top line of buttons lets you select which field to show, by default it shows all fields.
The second line shows the filters that are currently active, click on one of them to remove it again or 'Clear All' to clear them all in one go.
Then there is a table with the results, click on any of the field values to create a filter for that field and value combination. Currently there is no way to have filters for multiple values of the same field.
Lessons learned:
- mixing javascript and html: it took me a while to figure out that when you pass an array through javascript to a html button,
- the inspect feature makes life so much easier...,
- in order to copy an array you can't just assign it, you need to use .slice() to make a deep copy.
Answers found on stackoverflow:
- https://stackoverflow.com/questions/1960473/unique-values-in-an-array,
- https://stackoverflow.com/questions/17428587/transposing-a-2d-array-in-javascript (not used but may simplify parts of the code),
- https://stackoverflow.com/questions/3730510/javascript-sort-array-and-return-an-array-of-indicies-that-indicates-the-positi.
Try it out: count and filter test.
And here is the code:
<html>
<head><title>count and filter test</title>
</head>
<body onload="generate_data();filterandcount('all',[],[]);">
<p id="controls"> </p>
<p id="filters"> </p>
<p id="result_table"> </p>
<script>
// additional features that could be implemented:
// 1. show subset/all info for the selected filter, count becomes a button to show this
function onlyUnique(value, index, self)
{
return self.indexOf(value) === index;
}
var records=[];
var filter_field_array=[];
var filter_value_array=[];
function generate_data()
{
// generate some data
//var records=[];
var base=[2014,1,1,0,0,0];
var range=[4,12,31,23,59,59];
var len=[4,2,2,2,2,2];
var num_records=1000;
for (var i=0; i<6; i++)
{
var t=[];
for (var j=0; j<num_records; j++)
{
var t2="0000"+(Math.floor(Math.random()*range[i])+base[i]);
t2=t2.substring(t2.length-len[i]);
t.push(t2);
}
records.push(t);
}
for (var i=0; i<5; i++)
{
var t=[];
for (var j=0; j<num_records; j++)
{
var t2=records[0][j];
for (var k=0; k<(i+1); k++)
{
t2+=records[k+1][j];
}
t.push(t2);
}
records.push(t);
}
}
function filterandcount(do_field,filter_field_array_p,filter_value_array_p)
{
//console.log(do_field);
// console.log(typeof
var filter_field_array=[];
var filter_value_array=[];
if (filter_field_array_p instanceof Array)
{
filter_field_array=filter_field_array_p.slice();
}
else
{
filter_field_array.push(filter_field_array_p);
}
if (filter_value_array_p instanceof Array)
{
filter_value_array=filter_value_array_p.slice();
}
else
{
filter_value_array.push(filter_value_array_p);
}
var field_names = ['year','month','date','hour','minute','second','y+month','y+m+day','y+m+d+hour','y-m-d-h-minute','y-m-d-h-m-second'];
var controls_string="Count: <button type='button' onclick=\"filterandcount('all',["+filter_field_array+"],["+filter_value_array+"])\">All</button>";
for (var i=0; i<field_names.length; i++)
{
controls_string+="<button type='button' onclick=\"filterandcount('"+field_names[i]+"',["+filter_field_array+"],["+filter_value_array+"])\">"+field_names[i]+"</button>";
}
document.getElementById("controls").innerHTML=controls_string;
var t_filter_value="";
if (filter_value_array.length==0)
{
t_filter_value="<none>";
}
else
{
// remove duplicate values - the user should not be doing this!
var t_filter_field_array=[];
var t_filter_value_array=[];
for (var i=0; i<filter_value_array.length; i++)
{
// only add them if we can't find them in the temp arrays
var t=0;
for (var j=0; j<t_filter_value_array.length; j++)
{
if (filter_field_array[i]==t_filter_field_array[j])
{
if (filter_value_array[i]==t_filter_value_array[j])
{
t+=1;
}
}
}
if (t==0)
{
t_filter_field_array.push(filter_field_array[i]);
t_filter_value_array.push(filter_value_array[i]);
}
}
filter_field_array=t_filter_field_array.slice();
filter_value_array=t_filter_value_array.slice();
console.log(filter_field_array);
console.log(filter_value_array);
for (var i=0; i<filter_value_array.length; i++)
{
t_filter_field_array=[];
t_filter_value_array=[];
// remove current values from arrays
for (var j=0; j<filter_value_array.length; j++)
{
if (i!=j)
{
t_filter_field_array.push(filter_field_array[j]);
t_filter_value_array.push(filter_value_array[j]);
}
}
t_filter_value+="<button type='button' onclick=\"filterandcount('"+do_field+"',["+t_filter_field_array+"],["+t_filter_value_array+"])\">";
t_filter_value+="<b>✖</b> "+filter_value_array[i]+" on "+field_names[filter_field_array[i]]+"</button>";
}
}
var filters_string="Filter: " +t_filter_value+" <button type='button' onclick=\"filterandcount('"+do_field+"',[],[])\">Clear All</button>";
document.getElementById("filters").innerHTML=filters_string;
// but we actually need to transpose the array so we can grab the unique values per array
//transpose = m => m[0].map((x,i) => m.map(x => x[i]))
//records2=transpose(records);
// uhm, we're generating the data ourselves a couple lines up - so why transpose!
// fixed!
// instead of just assigned it to records2 run any filters first
var records2=[];
if (filter_field_array.length!=0)
{
// create a set of empty arrays
for (var i=0; i<records.length; i++)
{
records2.push([]);
}
for (var i=0; i<records[0].length; i++)
{
var t=0;
for (var j=0; j<filter_field_array.length; j++)
{
if (records[filter_field_array[j]][i]==filter_value_array[j])
{
t+=1;
}
}
if (t==filter_field_array.length)
{
for (var j=0; j<records.length; j++)
{
records2[j].push(records[j][i]);
}
}
}
}
else
{
records2=records;
}
//records2=records;
// now get the unique values per sub-array
var unique_records2=[]; //initialize empty array
for (var i=0; i<records2.length; i++)
{
// not optimized: only do the relevant sub-array(s)
//if ((field_names[i]==do_field)||(do_field=="all"))
//{
unique_records2.push(records2[i].filter(onlyUnique));
//}
//else
//{
//unique_records2.push([]);
//}
}
// now we need to count the values for those
// first we populate the count array with 0's
var unique_count2=[];
for (var i=0; i<unique_records2.length; i++)
{
unique_count2.push([]);
for (var j=0; j<unique_records2[i].length; j++)
{
unique_count2[i].push(0);
}
}
// now we can just increase whenever we have a match
for (var i=0; i<records2.length; i++)
{
for (var j=0; j<records2[i].length; j++)
{
for (var k=0; k<unique_records2[i].length; k++)
{
if (records2[i][j]==unique_records2[i][k])
{
unique_count2[i][k]+=1;
}
}
}
}
// sort each fields value in descending order
// use an array with pointers so we don't end up copying lots of data
var unique_sort_order=[];
for (var i=0; i<unique_count2.length; i++)
{
var t=[];
// add an index to the array
for (var j=0; j<unique_count2[i].length; j++)
{
t[j]=[unique_count2[i][j],j];
}
t.sort(function(left, right) { return left[0] < right[0] ? -1 : 1; });
t.reverse();
var t2=[];
for (var j=0; j<t.length; j++)
{
t2.push(t[j][1]);
//t2[j]=t[j][0];
}
unique_sort_order.push(t2);
}
// now replace the table in div with the contents
table_string="<table border=1>";
for (var i=0; i<field_names.length; i++)
{
if ((field_names[i]==do_field)||(do_field=="all"))
{
var ti=filter_field_array.slice();
ti.push(i);
table_string +="<tr><td><b>"+field_names[i]+"</b></td></tr>";
for (var j=0; j<unique_records2[i].length; j++)
{
var ti2=filter_value_array.slice();
ti2.push(unique_records2[i][unique_sort_order[i][j]]);
//console.log(ti);
//console.log(ti2);
table_string +="<tr><td><button type='button' onclick=\"filterandcount('all',["+ti+"],["+ti2+"])\">"+unique_records2[i][unique_sort_order[i][j]]+"</button>";
table_string +="</td><td>"+unique_count2[i][unique_sort_order[i][j]]+"</td></tr>";
}
}
}
table_string +="</table>";
// simplified: print the array count values
document.getElementById("result_table").innerHTML = table_string;
}
</script>
</body>
</html>