Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import re | |
| import datetime | |
| from urllib import request | |
| from lxml import etree | |
| url_prefix_mapping = { | |
| 'acl': 'https://aclanthology.org', | |
| 'emnlp': 'https://aclanthology.org', | |
| 'naacl': 'https://aclanthology.org', | |
| 'tacl': 'https://aclanthology.org', | |
| 'nips': 'https://papers.nips.cc', | |
| 'icml': 'https://papers.nips.cc', | |
| 'iclr': 'https://iclr.cc', | |
| } | |
| mlr_mapping = { | |
| ('icml', 2020): 'v119', | |
| ('icml', 2021): 'v139', | |
| ('icml', 2022): 'v162', | |
| ('icml', 2023): 'v202', | |
| ('icml', 2024): 'v139', | |
| } | |
| def get_paper_home(venue, year): | |
| if venue in ['acl', 'emnlp', 'naacl']: | |
| return f'https://aclanthology.org/events/{venue}-{year}' | |
| elif venue == 'nips': | |
| return f'https://papers.{venue}.cc/paper_files/paper/{year}' | |
| elif venue == 'icml': | |
| return f'https://proceedings.mlr.press/{mlr_mapping[(venue, year)]}' | |
| elif venue == 'iclr': | |
| return f'https://iclr.cc/Downloads/{year}' | |
| def check_keywords(ele, keywords): | |
| s = ''.join(ele.itertext()).lower() | |
| url = ele.get('href') | |
| for i in keywords: | |
| match = re.search(i, s) | |
| if match: | |
| return True | |
| return False | |
| def check_keywords_icml(ele, keywords): | |
| s = ''.join(ele.find('.//p[@class="title"]').itertext()).lower() | |
| url = ele.get('href') | |
| for i in keywords: | |
| match = re.search(i, s) | |
| if match: | |
| return True | |
| return False | |
| def search(keywords, venues, min_year, max_year): | |
| keywords = [keyword.strip() for keyword in keywords.split(",")] | |
| year_range = list(range(min_year, max_year)) | |
| search_venues = [] | |
| if "NeurIPS/ICLR/ICML" in venues: | |
| search_venues.extend(['nips', 'iclr', 'icml']) | |
| if "*ACL" in venues: | |
| search_venues.extend(['acl', 'emnlp', 'naacl', 'tacl']) | |
| if "CVPR/ECCV/ICCV" in venues: | |
| search_venues.extend(['nips', 'iclr', 'icml']) | |
| results = [] | |
| for venue in search_venues: | |
| for year in year_range: | |
| print(venue, year) | |
| paper_home = get_paper_home(venue, year) | |
| url_prefix = url_prefix_mapping[venue] | |
| if venue == 'icml': | |
| url_prefix = paper_home | |
| try: | |
| response = request.urlopen(paper_home) | |
| except: | |
| continue | |
| html = response.read().decode() | |
| tree = etree.fromstring(html, etree.HTMLParser()) | |
| if 'acl' in venue: | |
| paper_tag_on_html = ".//a[@class='align-middle']" | |
| elif venue == 'iclr': | |
| paper_tag_on_html = ".//a[@class='Poster']" | |
| elif venue == 'nips': | |
| paper_tag_on_html = ".//a[@title='paper title']" | |
| elif venue == 'icml': | |
| paper_tag_on_html = ".//div[@class='paper']" | |
| elements = tree.findall(paper_tag_on_html) | |
| for element in elements: | |
| if venue == 'icml': | |
| if check_keywords_icml(element, keywords): | |
| paper_url = element.find('.//p[@class="links"]').find('a').get('href') | |
| results.append([paper_url]) | |
| else: | |
| if check_keywords(element, keywords): | |
| paper_url = url_prefix + element.get('href') | |
| results.append([paper_url]) | |
| print(len(elements)) | |
| print() | |
| return results | |
| """ | |
| For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
| """ | |
| current_year = datetime.datetime.now().year | |
| # demo = gr.Interface( | |
| # search, | |
| # inputs=[ | |
| # gr.Textbox(lines=2, placeholder="Keywords of the paper title. Supports ReGex."), | |
| # gr.CheckboxGroup(["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], label="Choose Venues to Search", value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"]), | |
| # gr.Slider(minimum=2020, maximum=current_year, value=[2020, current_year], label="Year Range", step=1) | |
| # ], | |
| # outputs=gr.DataFrame(headers=["Paper Link", "Title", "Authors"]) | |
| # ) | |
| def test_search(keywords, venues, min_year, max_year): | |
| return [["https://example.com"], ["https://anotherexample.com"]] | |
| with gr.Blocks() as demo: | |
| with gr.Row(): # Organize inputs and outputs in a row (side by side) | |
| with gr.Column(scale=1): # Input section (narrower) | |
| # Textbox for keywords | |
| textbox = gr.Textbox( | |
| label="Enter comma-separated keywords", | |
| placeholder="Enter keywords, separated by commas...", | |
| lines=2 | |
| ) | |
| # Vertical checkbox group for actions | |
| checkbox = gr.CheckboxGroup( | |
| ["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], | |
| label="Choose Venues to Search", | |
| value=["NeurIPS/ICLR/ICML", "*ACL", "CVPR/ECCV/ICCV"], | |
| type="value" | |
| ) | |
| # Year range slider | |
| min_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=2020, label="Select Min Year", step=1) | |
| max_year_slider = gr.Slider(minimum=2015, maximum=current_year, value=current_year, label="Select Max Year", step=1) | |
| submit_button = gr.Button("Search") | |
| with gr.Column(scale=3): # Output section (wider) | |
| # Output table | |
| output_table = gr.DataFrame( | |
| headers=["Paper Link",],# "Title", "Authors" | |
| label="Results" | |
| ) | |
| # Link the input components to the output function | |
| submit_button.click( | |
| search, | |
| inputs=[textbox, checkbox, min_year_slider, max_year_slider], | |
| outputs=output_table | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |