"""
This notebook implements RISOTTO's GUI.
The GUI is structured as the following tree:
> VBox
> FilterAndSearchView
> Box
> PaperSetView
> VBox
> PaperView_1
> PaperView_2
> ...
> PaperView_n
> PaperSetView.nav_widgets
"""
# The artifacts built in the cooking stage are loaded
papers_artifact = load_papers_artifact().fillna("N/A")
papers_topics_artifact = load_papers_topics_artifacts()
topics_artifact = load_topics_artifacts()
class FilterAndSearchView:
"""
This view is responsible for rendering the filter and search widgets.
"""
# Constant defining how many relevant tokens are displayed in the options
TOKENS_PER_TOPIC = 5
def __init__(self, topics_artifact, on_filter_handler):
"""
Args:
- topics_artifact: a Pandas DataFrame with the tokens pesudocounts
of the each topic and subtopic.
- on_filter_handler: a function that's called when the 'Filter'
button is pressed. It receives as parameters the selected
topic and subtopic identifier, and the search text field value.
"""
self._topics_artifact = topics_artifact
self._on_filter_handler = on_filter_handler
self._topics, self._subtopics = self._get_topics(topics_artifact)
@classmethod
def _get_topics(cls, topics_artifact):
"""
Args:
- topics_artifact: a Pandas DataFrame with the tokens pesudocounts
of the each topic and subtopic.
Returns:
- topics: list of tuples. Each tuple's first and second elements are
the topic's readable name and identifier, respectively.
- subtopics: dictionary with topic ids as key and list of tuples as
values. Each tuple's first and second elements are
the subtopic's readable name and identifier, respectively.
"""
topics = []
subtopics = defaultdict(list)
for col_name in topics_artifact.columns:
if "-" not in col_name:
topics.append((cls._get_dropdown_name(topics_artifact[col_name]), col_name))
else:
topic_id = col_name.split("-")[0]
subtopics[topic_id].append((cls._get_dropdown_name(topics_artifact[col_name]), col_name))
return topics, subtopics
@classmethod
def _get_dropdown_name(cls, series):
"""
Args:
- series: a Pandas Series with the tokens pseudocounts of a topic
or subtopic
Returns:
- str: a readable name of the topic or subtopic
"""
prefix = f"#{series.name}"
most_relevant = ", ".join(map(str, series.sort_values(ascending=False).index[:cls.TOKENS_PER_TOPIC]))
return f"{prefix} ({most_relevant})"
def to_widget(self):
text_line = HTML("""
<style type="text/css">
#filter-text {
position: relative;
}
#filter-text span {
background-color: white;
padding-right: 10px;
color: #7d8390;
}
#filter-text::after {
content: "";
position: absolute;
bottom: 0;
left: 0;
right: 0;
top: 1em;
border-top: 1px solid #dce0e3;
z-index: -1;
}
.filter-label {
font-weight: bold;
color: #384257;
margin-bottom: 0;
}
.filter-button {
background-color: #384257;
color: white;
font-weight: bold;
}
.border-top {
border-top: 1px solid #dce0e3;
padding-top: 1em;
margin-top: 1em;
}
</style>
<div>
<p id="filter-text">
<span>See relevant papers</span>
</p>
</div>
""")
topics_dropdown_input = widgets.Dropdown(
options=[("All", None)] + self._topics,
)
topics_dropdown = VBox([
HTML("<p class='filter-label'>Topic</p>"),
topics_dropdown_input,
])
subtopics_dropdown_input = widgets.Dropdown(
options=[("All", None)] + self._subtopics[topics_dropdown_input.value],
)
subtopics_dropdown = VBox([
HTML("<p class='filter-label'>Subtopic</p>"),
subtopics_dropdown_input,
])
search_textbox_input = widgets.Text()
search_textbox = VBox([
HTML("<p class='filter-label'>Search</p>"),
search_textbox_input,
])
search_button_input = widgets.Button(
description="Filter",
)
search_button_input.add_class("filter-button")
search_button = Box([
search_button_input
],
layout=Layout(
display="flex",
justify_content="center",
margin="15px 0 0 0"
))
first_row = HBox([
topics_dropdown,
subtopics_dropdown,
search_textbox,
],
layout=Layout(
display="flex",
justify_content="space-between"
)
)
filter_and_search_box = VBox([
text_line,
first_row,
search_button,
])
# Event handlers
def handle_topic_change(event):
if event["type"] == "change" and event["name"] == "value":
subtopics_dropdown_input.options = [("All", None)] + self._subtopics[event["new"]]
def handle_subtopic_change(event):
if event["type"] == "change" and event["name"] == "value":
pass
def handle_search_button_click(_):
self._on_filter_handler(
topic_id=topics_dropdown_input.value,
subtopic_id=subtopics_dropdown_input.value,
text=search_textbox_input.value
)
topics_dropdown_input.observe(handle_topic_change)
subtopics_dropdown_input.observe(handle_subtopic_change)
search_button_input.on_click(handle_search_button_click)
return filter_and_search_box
class HTMLPaperView:
"""
This view is responsible for rendering a paper information.
"""
def __init__(self, row, index):
"""
Args:
- row: a Pandas Series with the paper's data.
"""
self._row = row
self._index = index
def render_open_box(self, row):
return ""
if True:
return f"""
<span class="open"> OPEN </span>
"""
else:
return f"""
<span class="closed"> CLOSED </span>
"""
def to_widget(self):
html = HTML(
f"""
<style type="text/css">
.paperContainer {{
background: #f5f5f5;
padding-top: 1em;
padding-bottom: 1em;
display: grid;
grid-template-columns: min-content auto min-content;
border-radius: 7px;
line-height: 1.5em;
}}
.paperContainer .index {{
padding-left: 1em;
font-weight: bold;
font-size: 1.2em;
}}
.paperContainer .left {{
padding-left: 1em;
padding-right: 1em;
}}
.paperContainer .title {{
font-weight: bold;
font-size: 1.2em;
}}
.paperContainer p {{
margin: 0;
margin-bottom: 0.5em;
}}
.paperContainer .bottomRow {{
display: flex;
flex-flow: row wrap;
margin-top: 0.5em;
}}
.paperContainer .bottomPair {{
margin-right: 20px;
}}
.paperContainer .labl {{
color: #acb0bb;
}}
.paperContainer .right {{
padding-left: 1em;
padding-right: 1em;
min-width: 8em;
white-space: nowrap;
text-align: center;
border-left: 1px solid #acb0bb;
}}
.paperContainer .open {{
padding: 0.5em;
font-weight: bold;
color: white;
background: #8ee27c;
}}
.paperContainer .closed {{
padding: 0.5em;
font-weight: bold;
color: white;
background: #f76370;
}}
</style>
<div class="paperContainer">
<span class="index">
{self._index}
</span>
<div class="left">
<p class="title">
{self._row.title}
</p>
<p>
{self._row.authors}
</p>
<p>
<a href="http://doi.org/{self._row.doi}">
{f"doi.org/{self._row.doi}"}
</a>
</p>
<div class="bottomRow">
<span class="bottomPair">
<span class="labl"> Country: </span>
<span class="value">
{self._row.country or "N/A"}
</span>
</span>
<span class="bottomPair">
<span class="labl"> Affiliation: </span>
<span class="value">
{self._row.affiliation or "N/A"}
</span>
</span>
<span class="bottomPair">
<span class="labl"> DOI: </span>
<span class="value">
{self._row.doi}
</span>
</span>
</div>
</div>
<div class="right">
<p class="labl"> Date </p>
<p> {self._row.publish_time} </p>
{self.render_open_box(self._row)}
</div>
</div>
"""
)
return html
class PaperSetView:
"""
This view is responsible for rendering a set of papers.
Also, the view implements pagination features.
"""
def __init__(self, papers, papers_topics, page, items_per_page, parent):
"""
Args:
- papers: a Pandas DataFrame with the data of the papers,
including the `cord_uid` identifier and the PageRank scores.
- papers_topics: a Pandas DataFrame with the association between
papers, topics, and subtopics
- topics_artifact: a Pandas DataFrame with the tokens pesudocounts
of the each topic and subtopic.
- page: first page to render.
- items_per_page: how many items to render in each page.
- parent: the parent widget. It's required for rerendering the
component on updates.
"""
self._papers = papers.join(papers_topics).sort_values(by="pagerank", ascending=False)
self._page = page
self._items_per_page = items_per_page
self._parent = parent
self._topic_id = None
self._subtopic_id = None
self._query_text = ""
self._render()
def _decrement_page(self, _):
self._page += -1
self._render()
def _increment_page(self, _):
self._page += 1
self._render()
def _goto_page(self, page):
self._page = page
self._render()
def _render(self):
"""
This method performs the view rendering, filtering the papers
by the selected topic and subtopic, and then by the text query.
It's able to rerender the component on updates modifying the
parent's children attribute.
"""
papers = self._papers
# Topic filtering
if self._topic_id is not None:
mask = papers["topic"] == self._topic_id
if self._subtopic_id is not None:
mask = mask & (papers["subtopic"] == self._subtopic_id)
papers = papers[mask]
# Query filtering
query_text = self._query_text
if query_text is not None and len(query_text) > 0:
mask = (papers["title"] + papers["abstract"]).str.lower().str.contains(query_text)
papers = papers[mask]
self._papers_views = self.build_papers_views(papers, self._page, self._items_per_page)
self._parent.children = [self.to_widget(papers)]
@staticmethod
def build_papers_views(papers, page, items_per_page):
"""
Args:
- papers: the filtered Pandas DataFrame with the papers information.
- page: the page to render.
- items_per_page: how many items to render in each page.
Returns:
- list with the current page PaperViews.
"""
start_idx, end_idx = page * items_per_page, (page + 1) * items_per_page
page_subset = papers.iloc[start_idx:end_idx]
counter = itertools.count(start_idx + 1)
return [HTMLPaperView(row, next(counter)) for _index, row in page_subset.iterrows()]
@staticmethod
def get_nav_widgets(page, items_per_page, num_items, decrement_handler, increment_handler, goto_handler):
"""
Args:
- page: current page
- items_per_page: how many items to display each page
- num_items: total number of items
- decrement_handler: function that it's called when the previous page
button is clicked
- increment_handler: function that it's called when the nextpage
button is clicked
- increment_handler: function that it's called to go to a specific page
Returns:
- HBox with the navigation widgets
"""
styles = HTML("""
<style type="text/css">
.nav-button {
background-color: #384257;
color: white;
font-weight: bold;
width: 3em;
}
.nav-input {
width: 5em;
}
</style>
""")
last_page = num_items // items_per_page
nav = [styles]
prev_button_input = widgets.Button(
decription="",
icon='angle-left',
tooltip="Previous page"
)
prev_button_input.add_class("nav-button")
prev_button_input.on_click(decrement_handler)
next_button_input = widgets.Button(
decription="",
icon='angle-right',
tooltip="Next page"
)
next_button_input.add_class("nav-button")
next_button_input.on_click(increment_handler)
first_button_input = widgets.Button(
decription="",
icon='angle-double-left',
tooltip="First page"
)
first_button_input.add_class("nav-button")
first_button_input.on_click(lambda x: goto_handler(0))
last_button_input = widgets.Button(
decription="",
icon='angle-double-right',
tooltip="Last page"
)
last_button_input.add_class("nav-button")
last_button_input.on_click(lambda x: goto_handler(last_page))
page_dropdown_input = widgets.Dropdown(
value=page + 1,
options=[x for x in range(1, last_page + 2)],
)
page_dropdown_input.add_class("nav-input")
def handle_page_change(event):
if event["type"] == "change" and event["name"] == "value":
goto_handler(event["new"] - 1)
page_dropdown_input.observe(handle_page_change)
if page > 0:
nav.append(first_button_input)
nav.append(prev_button_input)
nav.append(HTML(f"Page "))
nav.append(page_dropdown_input)
nav.append(HTML(f" of {last_page + 1}"))
if page < last_page:
nav.append(next_button_input)
nav.append(last_button_input)
return HBox(nav, layout=Layout(margin='15px auto 0 auto'))
def on_filter_handler(self, topic_id, subtopic_id, text):
"""
This metthod is invoked when the 'Filter' button is clicked.
Args:
- topic_id: new topic identifier
- subtopic_id: new subtopic identifier
- text: the search text field value
"""
self._topic_id = int(topic_id) if topic_id is not None else None
self._subtopic_id = int(subtopic_id.split("-")[1]) if subtopic_id is not None else None
self._query_text = text
self._render()
def to_widget(self, papers):
"""
Args:
- papers: the filtered Pandas DataFrame with the papers information
Returns:
- VBox with the view widgets
"""
num_results = Box([
HTML(f"<p class='filter-label'>{ '{:,}'.format(len(papers)).replace(',','.') } results</p>")
])
num_results.add_class("border-top")
papers_widgets = [view.to_widget() for view in self._papers_views]
nav_widgets = self.get_nav_widgets(
page=self._page,
items_per_page=self._items_per_page,
num_items=len(papers),
decrement_handler=self._decrement_page,
increment_handler=self._increment_page,
goto_handler=self._goto_page
)
box = VBox([num_results] + papers_widgets + [nav_widgets])
return box
words_box = Box([HTML("""
<h1>Risotto in a few words...</h1>
<p>
RISOTTO is a research support tool that applies state of the art unsupervised NLP and ML methods to analyze research papers freely available online.
</p>
<p>
Risotto sorts the papers by relevance. For more information see our <a href="/">technical report</a>.
</p>
<h2> Hierarchical topic modeling </h2>
<p>
RISOTTO automatically models the latent topics in the COVID-19 Open Research Dataset published by Allen AI.
We use a two-step hierarchic topic modeling algorithm in order to build topics and subtopics for each topic.
Each topic and subtopic is represented with an identifier and its top-5 most relevant tokens.
</p>
<p>
We hope that these topics and subtopics help biomedical researchers to efficiently explore the most recent research advances regarding COVID-19.
</p>
""")])
paper_set_box = Box()
paper_set_view = PaperSetView(
papers=papers_artifact,
papers_topics=papers_topics_artifact,
page=0,
items_per_page=10,
parent=paper_set_box
)
filter_and_search_view = FilterAndSearchView(
topics_artifact=topics_artifact,
on_filter_handler=paper_set_view.on_filter_handler,
)
box = VBox([
words_box,
filter_and_search_view.to_widget(),
paper_set_box,
], layout=Layout(margin='auto', padding='15px', max_width='1170px', min_height='calc(100vh - 218px)'))
box