%%html
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-168073501-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-168073501-1');
</script>

<link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.css" />
<script src="https://cdn.jsdelivr.net/npm/cookieconsent@3/build/cookieconsent.min.js" data-cfasync="false"></script>
<script>
window.cookieconsent.initialise({
  "palette": {
    "popup": {
      "background": "#000"
    },
    "button": {
      "background": "#f1d600"
    }
  },
  "type": "opt-out",
  "content": {
    "href": "https://github.com/Inria-Chile/risotto/blob/master/COOKIES.md"
  }
});
</script>

<style>
    @font-face {
      font-family: "InriaSans";
      src: url("assets/InriaSans-Regular.ttf") format("truetype");
    }
    body {
      font-family: 'InriaSans';
    }
    .jp-Notebook-cell {
        padding: 0 !important;
    }
    .jp-Cell-outputWrapper {
        margin: 0 !important;
    }
    .jp-Notebook {
        width: 100% !important;
        padding: 0 !important;
    }
    .jp-RenderedHTMLCommon {
        font-family: 'InriaSans';
        width: 100% !important;
        padding: 0 !important;
        margin: 0 !important;
    }
    .jp-RenderedHTMLCommon p {
        margin: 0;
    }
</style>
<div align='left' style="margin:0;padding:2em;background:#e53411;display:flex;flex-flow:row;align-items:center">
    <a href='http://inria.fr'>
        <img style="height:50px !important;padding-right:20px;position:relative;border-right:1px solid white;"  src='https://github.com/Inria-Chile/risotto/raw/master/assets/inria-white.png' alt='Inria logo' title='Inria logo'/>
    </a>
    <a href='https://github.com/Inria-Chile/risotto' style="font-size:2em;color:white;padding-left:20px;text-decoration: none;">
        RISOTTO
    </a>
</div>
# This notebook is not intended to be used as a notebook, but reather as a GUI with Voila
from collections import defaultdict

from ipywidgets import GridspecLayout, Layout, Label, Box, HBox, VBox, HTML, widgets, Button, Style, Text
from IPython.display import display
import itertools

from risotto.artifacts import load_papers_artifact, load_papers_topics_artifacts, load_topics_artifacts
/Users/lmarti/.pyenv/versions/3.8.2/envs/risotto/lib/python3.8/site-packages/pandas/compat/__init__.py:117: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
  warnings.warn(msg)
"""
This notebook implements RISOTTO's GUI.
The GUI is structured as the following tree:

> VBox
    > FilterAndSearchView
    > Box
        > PaperSetView
            > VBox
                > PaperView_1
                > PaperView_2
                > ...
                > PaperView_n
                > PaperSetView.nav_widgets
"""


# The artifacts built in the cooking stage are loaded
papers_artifact = load_papers_artifact().fillna("N/A")
papers_topics_artifact = load_papers_topics_artifacts()
topics_artifact = load_topics_artifacts()


class FilterAndSearchView:
    """
    This view is responsible for rendering the filter and search widgets.
    """
    
    # Constant defining how many relevant tokens are displayed in the options
    TOKENS_PER_TOPIC = 5
    
    def __init__(self, topics_artifact, on_filter_handler):
        """
        Args:
            - topics_artifact: a Pandas DataFrame with the tokens pesudocounts
                of the each topic and subtopic.
            - on_filter_handler: a function that's called when the 'Filter'
                button is pressed. It receives as parameters the selected
                topic and subtopic identifier, and the search text field value.
        """
        self._topics_artifact = topics_artifact
        self._on_filter_handler = on_filter_handler
        self._topics, self._subtopics = self._get_topics(topics_artifact)
    
    @classmethod
    def _get_topics(cls, topics_artifact):
        """
        Args:
            - topics_artifact: a Pandas DataFrame with the tokens pesudocounts
                of the each topic and subtopic.
        Returns:
            - topics: list of tuples. Each tuple's first and second elements are
                the topic's readable name and identifier, respectively.
            - subtopics: dictionary with topic ids as key and list of tuples as
                values. Each tuple's first and second elements are
                the subtopic's readable name and identifier, respectively.
        """
        topics = []
        subtopics = defaultdict(list)
        for col_name in topics_artifact.columns:
            if "-" not in col_name:
                topics.append((cls._get_dropdown_name(topics_artifact[col_name]), col_name))
            else:
                topic_id = col_name.split("-")[0]
                subtopics[topic_id].append((cls._get_dropdown_name(topics_artifact[col_name]), col_name))
        return topics, subtopics
    
    @classmethod
    def _get_dropdown_name(cls, series):
        """
        Args:
            - series: a Pandas Series with the tokens pseudocounts of a topic
                or subtopic
        Returns:
            - str: a readable name of the topic or subtopic
        """
        prefix = f"#{series.name}"
        most_relevant = ", ".join(map(str, series.sort_values(ascending=False).index[:cls.TOKENS_PER_TOPIC]))
        return f"{prefix} ({most_relevant})"
        
    def to_widget(self):
        text_line = HTML("""
            <style type="text/css">
                #filter-text {
                    position: relative;
                }

                #filter-text span {
                    background-color: white;
                    padding-right: 10px;
                    color: #7d8390;
                }

                #filter-text::after {
                    content: "";
                    position: absolute;
                    bottom: 0;
                    left: 0;
                    right: 0;
                    top: 1em;
                    border-top: 1px solid #dce0e3;
                    z-index: -1;
                }
                
                .filter-label {
                    font-weight: bold;
                    color: #384257;
                    margin-bottom: 0;
                }
                
                .filter-button {
                    background-color: #384257;
                    color: white;
                    font-weight: bold;
                }
                
                .border-top {
                    border-top: 1px solid #dce0e3;
                    padding-top: 1em;
                    margin-top: 1em;
                }
                
            </style>
            <div>
                <p id="filter-text">
                    <span>See relevant papers</span>
                </p>
            </div>
        """)
        topics_dropdown_input = widgets.Dropdown(
            options=[("All", None)] + self._topics,
        )
        topics_dropdown = VBox([
            HTML("<p class='filter-label'>Topic</p>"),
            topics_dropdown_input,
        ])
        subtopics_dropdown_input = widgets.Dropdown(
            options=[("All", None)] + self._subtopics[topics_dropdown_input.value],
        )
        subtopics_dropdown = VBox([
            HTML("<p class='filter-label'>Subtopic</p>"),
            subtopics_dropdown_input,
        ])
        search_textbox_input = widgets.Text()
        search_textbox = VBox([
            HTML("<p class='filter-label'>Search</p>"),
            search_textbox_input,
        ])
        search_button_input = widgets.Button(
            description="Filter",
        )
        search_button_input.add_class("filter-button")
        search_button = Box([
            search_button_input
        ],
        layout=Layout(
            display="flex",
            justify_content="center",
            margin="15px 0 0 0"
        ))
        first_row = HBox([
            topics_dropdown,
            subtopics_dropdown,
            search_textbox,
            ],
            layout=Layout(
                display="flex",
                justify_content="space-between"
            )
        )
        filter_and_search_box = VBox([
            text_line,
            first_row,
            search_button,
        ])

        # Event handlers
        def handle_topic_change(event):
            if event["type"] == "change" and event["name"] == "value":
                subtopics_dropdown_input.options = [("All", None)] + self._subtopics[event["new"]]

        def handle_subtopic_change(event):
            if event["type"] == "change" and event["name"] == "value":
                pass
        
        def handle_search_button_click(_):
            self._on_filter_handler(
                topic_id=topics_dropdown_input.value,
                subtopic_id=subtopics_dropdown_input.value,
                text=search_textbox_input.value
            )
            
        topics_dropdown_input.observe(handle_topic_change)
        subtopics_dropdown_input.observe(handle_subtopic_change)
        search_button_input.on_click(handle_search_button_click)
        
        return filter_and_search_box
    
    
class HTMLPaperView:
    """
    This view is responsible for rendering a paper information.
    """
    
    def __init__(self, row, index):
        """
        Args:
            - row: a Pandas Series with the paper's data.
        """
        self._row = row
        self._index = index
    
    def render_open_box(self, row):
        return ""
        if True:
            return f"""
                <span class="open"> OPEN </span>
            """
        else:
            return f"""
                <span class="closed"> CLOSED </span>
            """
        
    def to_widget(self):
        html = HTML(
            f"""
            <style type="text/css">
              .paperContainer {{
                background: #f5f5f5;
                padding-top: 1em;
                padding-bottom: 1em;
                display: grid;
                grid-template-columns: min-content auto min-content;
                border-radius: 7px;
                line-height: 1.5em;
              }}
              .paperContainer .index {{
                padding-left: 1em;
                font-weight: bold;
                font-size: 1.2em;
              }}
              .paperContainer .left {{
                padding-left: 1em;
                padding-right: 1em;
              }}
              .paperContainer .title {{
                font-weight: bold;
                font-size: 1.2em;
              }}
              .paperContainer p {{
                margin: 0;
                margin-bottom: 0.5em;
              }}
              .paperContainer .bottomRow {{
                display: flex;
                flex-flow: row wrap;
                margin-top: 0.5em;
              }}
              .paperContainer .bottomPair {{
                margin-right: 20px;
              }}
              .paperContainer .labl {{
                color: #acb0bb;
              }}
              .paperContainer .right {{
                padding-left: 1em;
                padding-right: 1em;
                min-width: 8em;
                white-space: nowrap;
                text-align: center;
                border-left: 1px solid #acb0bb;
              }}
              .paperContainer .open {{
                padding: 0.5em;
                font-weight: bold;
                color: white;
                background: #8ee27c;
              }}
              .paperContainer .closed {{
                padding: 0.5em;
                font-weight: bold;
                color: white;
                background: #f76370;
              }}
            </style>
            <div class="paperContainer">
              <span class="index">
                {self._index}
              </span>
              <div class="left">
                <p class="title">
                  {self._row.title}
                </p>
                <p>
                  {self._row.authors}
                </p>
                <p>
                  <a href="http://doi.org/{self._row.doi}">
                    {f"doi.org/{self._row.doi}"}
                  </a>
                </p>
                <div class="bottomRow">
                  <span class="bottomPair">
                    <span class="labl"> Country: </span>
                    <span class="value">
                      {self._row.country or "N/A"}
                    </span>
                  </span>
                  <span class="bottomPair">
                    <span class="labl"> Affiliation: </span>
                    <span class="value">
                      {self._row.affiliation or "N/A"}
                    </span>
                  </span>
                  <span class="bottomPair">
                    <span class="labl"> DOI: </span>
                    <span class="value">
                      {self._row.doi}
                    </span>
                  </span>
                </div>
              </div>
              <div class="right">
                <p class="labl"> Date </p>
                <p> {self._row.publish_time} </p>
                {self.render_open_box(self._row)}
              </div>
            </div>
            """
        )
        return html

    
class PaperSetView:
    """
    This view is responsible for rendering a set of papers.
    Also, the view implements pagination features.
    """
    
    def __init__(self, papers, papers_topics, page, items_per_page, parent):
        """
        Args:
            - papers: a Pandas DataFrame with the data of the papers,
                including the `cord_uid` identifier and the PageRank scores.
            - papers_topics: a Pandas DataFrame with the association between
                papers, topics, and subtopics
            - topics_artifact: a Pandas DataFrame with the tokens pesudocounts
                of the each topic and subtopic.
            - page: first page to render.
            - items_per_page: how many items to render in each page.
            - parent: the parent widget. It's required for rerendering the
                component on updates.
        """
        self._papers = papers.join(papers_topics).sort_values(by="pagerank", ascending=False)
        self._page = page
        self._items_per_page = items_per_page
        self._parent = parent
        
        self._topic_id = None
        self._subtopic_id = None
        self._query_text = ""
        
        self._render()
    
    def _decrement_page(self, _):
        self._page += -1
        self._render()
    
    def _increment_page(self, _):
        self._page += 1
        self._render()
    
    def _goto_page(self, page):
        self._page = page
        self._render()
    
    def _render(self):
        """
        This method performs the view rendering, filtering the papers
        by the selected topic and subtopic, and then by the text query.
        It's able to rerender the component on updates modifying the
        parent's children attribute.
        """
        papers = self._papers
        # Topic filtering
        if self._topic_id is not None:
            mask = papers["topic"] == self._topic_id
            if self._subtopic_id is not None:
                mask = mask & (papers["subtopic"] == self._subtopic_id)
            papers = papers[mask]
        # Query filtering
        query_text = self._query_text
        if query_text is not None and len(query_text) > 0:
            mask = (papers["title"] + papers["abstract"]).str.lower().str.contains(query_text)
            papers = papers[mask]
        self._papers_views = self.build_papers_views(papers, self._page, self._items_per_page)
        self._parent.children = [self.to_widget(papers)]
        
    @staticmethod
    def build_papers_views(papers, page, items_per_page):
        """
        Args:
            - papers: the filtered Pandas DataFrame with the papers information.
            - page: the page to render.
            - items_per_page: how many items to render in each page.
        Returns:
            - list with the current page PaperViews.
        """
        start_idx, end_idx = page * items_per_page, (page + 1) * items_per_page
        page_subset = papers.iloc[start_idx:end_idx]
        counter = itertools.count(start_idx + 1)
        return [HTMLPaperView(row, next(counter)) for _index, row in page_subset.iterrows()]
    
    @staticmethod
    def get_nav_widgets(page, items_per_page, num_items, decrement_handler, increment_handler, goto_handler):
        """
        Args:
            - page: current page
            - items_per_page: how many items to display each page
            - num_items: total number of items
            - decrement_handler: function that it's called when the previous page
                button is clicked
            - increment_handler: function that it's called when the nextpage
                button is clicked
            - increment_handler: function that it's called to go to a specific page
        Returns:
            - HBox with the navigation widgets
        """
        styles = HTML("""
            <style type="text/css">
                .nav-button {
                    background-color: #384257;
                    color: white;
                    font-weight: bold;
                    width: 3em;
                }
                .nav-input {
                    width: 5em;
                }
            </style>
        """)
        last_page = num_items // items_per_page
        nav = [styles]

        prev_button_input = widgets.Button(
            decription="",
            icon='angle-left',
            tooltip="Previous page"
        )
        prev_button_input.add_class("nav-button")
        prev_button_input.on_click(decrement_handler)
        next_button_input = widgets.Button(
            decription="",
            icon='angle-right',
            tooltip="Next page"
        )
        next_button_input.add_class("nav-button")
        next_button_input.on_click(increment_handler)
        
        first_button_input = widgets.Button(
            decription="",
            icon='angle-double-left',
            tooltip="First page"
        )
        first_button_input.add_class("nav-button")
        first_button_input.on_click(lambda x: goto_handler(0))
        last_button_input = widgets.Button(
            decription="",
            icon='angle-double-right',
            tooltip="Last page"
        )
        last_button_input.add_class("nav-button")
        last_button_input.on_click(lambda x: goto_handler(last_page))
             
        page_dropdown_input = widgets.Dropdown(
            value=page + 1,
            options=[x for x in range(1, last_page + 2)],
        )
        page_dropdown_input.add_class("nav-input")

        def handle_page_change(event):
            if event["type"] == "change" and event["name"] == "value":
                goto_handler(event["new"] - 1)

        page_dropdown_input.observe(handle_page_change)

        if page > 0:
            nav.append(first_button_input)
            nav.append(prev_button_input)

        nav.append(HTML(f"Page "))
        nav.append(page_dropdown_input)
        nav.append(HTML(f" of {last_page + 1}"))

        if page < last_page:
            nav.append(next_button_input)
            nav.append(last_button_input)
        return HBox(nav, layout=Layout(margin='15px auto 0 auto'))
    
    def on_filter_handler(self, topic_id, subtopic_id, text):
        """
        This metthod is invoked when the 'Filter' button is clicked.
        Args:
            - topic_id: new topic identifier
            - subtopic_id: new subtopic identifier
            - text: the search text field value
        """
        self._topic_id = int(topic_id) if topic_id is not None else None
        self._subtopic_id = int(subtopic_id.split("-")[1]) if subtopic_id is not None else None
        self._query_text = text
        self._render()
        
    def to_widget(self, papers):
        """
        Args:
            - papers: the filtered Pandas DataFrame with the papers information
        Returns:
            - VBox with the view widgets
        """
        num_results = Box([
            HTML(f"<p class='filter-label'>{ '{:,}'.format(len(papers)).replace(',','.') } results</p>")
        ])
        num_results.add_class("border-top")
        papers_widgets = [view.to_widget() for view in self._papers_views]
        nav_widgets = self.get_nav_widgets(
            page=self._page,
            items_per_page=self._items_per_page,
            num_items=len(papers),
            decrement_handler=self._decrement_page,
            increment_handler=self._increment_page,
            goto_handler=self._goto_page
        )
        box = VBox([num_results] + papers_widgets + [nav_widgets])
        return box

words_box = Box([HTML("""
    <h1>Risotto in a few words...</h1>

    <p>
        RISOTTO is a research support tool that applies state of the art unsupervised NLP and ML methods to analyze research papers freely available online.
    </p>
    
    
    <p>
        Risotto sorts the papers by relevance. For more information see our <a href="/">technical report</a>.
    </p>

    <h2> Hierarchical topic modeling </h2>

    <p>
    RISOTTO automatically models the latent topics in the COVID-19 Open Research Dataset published by Allen AI.
    We use a two-step hierarchic topic modeling algorithm in order to build topics and subtopics for each topic.
    Each topic and subtopic is represented with an identifier and its top-5 most relevant tokens.
    </p>
    <p>
    We hope that these topics and subtopics help biomedical researchers to efficiently explore the most recent research advances regarding COVID-19.
    </p>
""")])
paper_set_box = Box()
paper_set_view = PaperSetView(
    papers=papers_artifact,
    papers_topics=papers_topics_artifact,
    page=0,
    items_per_page=10,
    parent=paper_set_box
)
filter_and_search_view = FilterAndSearchView(
    topics_artifact=topics_artifact,
    on_filter_handler=paper_set_view.on_filter_handler,
)
box = VBox([
    words_box,
    filter_and_search_view.to_widget(),
    paper_set_box,
], layout=Layout(margin='auto', padding='15px', max_width='1170px', min_height='calc(100vh - 218px)'))
box
%%html
<div align='left' style="margin:0;padding:2em;background-color:#384257;display:flex;flex-flow:row;align-items:center">
    <a href='http://inria.fr'>
        <img style="height:40px !important;padding-right:20px;position:relative;border-right:1px solid white;"  src='https://github.com/Inria-Chile/risotto/raw/master/assets/inria-white.png' alt='Inria logo' title='Inria logo'/>
    </a>
    <a href='https://github.com/Inria-Chile/risotto' style="font-size:2em;color:white;padding-left:20px;text-decoration: none;">
        RISOTTO
    </a>
</div>