Skip to content

Commit

Permalink
NN-527 Add URL scheme validation to ensure http or https
Browse files Browse the repository at this point in the history
- Parsed the URL using urlparse to check the scheme.
- Added a validation to raise a ValueError if the URL scheme is not http or https.
- Ensures only valid URL schemes are accepted, enhancing the robustness of the code.
  • Loading branch information
dyusuf committed Jul 26, 2024
1 parent c3e135c commit a3f74d5
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
8 changes: 7 additions & 1 deletion backend/src/util/data_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import re
import time
from urllib.parse import urlparse
from urllib.request import urlopen


Expand Down Expand Up @@ -104,9 +105,14 @@ def parse_compound_line(line):
def get(url, timeout=10, wait=1):
if type(wait) is int and wait > 0:
time.sleep(wait)

parsed_url = urlparse(url)
if parsed_url.scheme not in ["http", "https"]:
raise ValueError("URL scheme must be http or https")

try:
return (
urlopen(url, timeout=timeout)
urlopen(url, timeout=timeout) # nosec
.read()
.decode(encoding="utf-8", errors="ignore")
)
Expand Down
8 changes: 7 additions & 1 deletion scraping/KEGG/url.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import time
from urllib.parse import urlparse
from urllib.request import urlopen


def get(url, timeout=10, wait=1):
if type(wait) is int and wait > 0:
time.sleep(wait)

parsed_url = urlparse(url)
if parsed_url.scheme not in ["http", "https"]:
raise ValueError("URL scheme must be http or https")

try:
return (
urlopen(url, timeout=timeout)
urlopen(url, timeout=timeout) # nosec
.read()
.decode(encoding="utf-8", errors="ignore")
)
Expand Down

0 comments on commit a3f74d5

Please sign in to comment.