|
@@ -4,13 +4,14 @@ import os
|
|
|
from enum import Enum
|
|
from enum import Enum
|
|
|
from urllib.parse import urlparse
|
|
from urllib.parse import urlparse
|
|
|
from uuid import uuid4
|
|
from uuid import uuid4
|
|
|
|
|
+from typing import Union
|
|
|
|
|
|
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
import requests
|
|
import requests
|
|
|
-from pandera.typing import DataFrame
|
|
|
|
|
from pandera.errors import SchemaError
|
|
from pandera.errors import SchemaError
|
|
|
|
|
+from pandera.typing import DataFrame
|
|
|
|
|
|
|
|
-from .validation import ContactSchema, CreneauSchema, CreneauDataSchema
|
|
|
|
|
|
|
+from .validation import ContactSchema, CreneauSchema, CreneauDataSchema, GsheetData
|
|
|
|
|
|
|
|
planning_gid = "1001381542"
|
|
planning_gid = "1001381542"
|
|
|
creneau_gid = "1884137958"
|
|
creneau_gid = "1884137958"
|
|
@@ -25,6 +26,16 @@ class ParserState(Enum):
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_csv_row(raw_data: str, separator: str = ",", escape: str = '"') -> list[str]:
|
|
def split_csv_row(raw_data: str, separator: str = ",", escape: str = '"') -> list[str]:
|
|
|
|
|
+ """Split a csv row into the different value
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ raw_data (str): data
|
|
|
|
|
+ separator (str, optional): column separator. Defaults to ",".
|
|
|
|
|
+ escape (str, optional): excaping character . Defaults to '"'.
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ list[str]: list of value in the csv row
|
|
|
|
|
+ """
|
|
|
state: ParserState = ParserState.STARTING_VALUE
|
|
state: ParserState = ParserState.STARTING_VALUE
|
|
|
arr = []
|
|
arr = []
|
|
|
current_item = ""
|
|
current_item = ""
|
|
@@ -70,6 +81,17 @@ class ParsingError(Exception):
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_doc_uid(url: str) -> str:
|
|
def extract_doc_uid(url: str) -> str:
|
|
|
|
|
+ """Extract the uid of a gsheet from its url
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ url (str): url of google sheet to extract uid from
|
|
|
|
|
+
|
|
|
|
|
+ Raises:
|
|
|
|
|
+ InvalidUrlError: if the url does not correspond to a gsheet url
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ str: uuid of the google sheet
|
|
|
|
|
+ """
|
|
|
res = urlparse(url)
|
|
res = urlparse(url)
|
|
|
if res.netloc != "docs.google.com":
|
|
if res.netloc != "docs.google.com":
|
|
|
raise InvalidUrlError("Invalid netloc")
|
|
raise InvalidUrlError("Invalid netloc")
|
|
@@ -82,15 +104,15 @@ def extract_doc_uid(url: str) -> str:
|
|
|
return doc_id
|
|
return doc_id
|
|
|
|
|
|
|
|
|
|
|
|
|
-def build_sheet_url(doc_id, sheet_id):
|
|
|
|
|
|
|
+def build_sheet_url(doc_id: str, sheet_id: str):
|
|
|
return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
|
|
return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
|
|
|
|
|
|
|
|
|
|
|
|
|
-def downloadAndSave(doc_ui, sheet_gid, fname):
|
|
|
|
|
|
|
+def downloadAndSave(doc_ui: str, sheet_gid: str, file: Union[str, bytes, os.PathLike]):
|
|
|
url = build_sheet_url(doc_ui, sheet_gid)
|
|
url = build_sheet_url(doc_ui, sheet_gid)
|
|
|
- print("Downloading " + fname)
|
|
|
|
|
|
|
+ print("Downloading " + str(file))
|
|
|
rep = requests.get(url)
|
|
rep = requests.get(url)
|
|
|
- with open(fname, "wb") as f:
|
|
|
|
|
|
|
+ with open(file, "wb") as f:
|
|
|
f.write(rep.content)
|
|
f.write(rep.content)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -120,7 +142,9 @@ def getCreneauDataFrame(csv_filename: str) -> DataFrame[CreneauDataSchema]:
|
|
|
|
|
|
|
|
|
|
|
|
|
def getPlanningDataFrame(
|
|
def getPlanningDataFrame(
|
|
|
- csv_filename, starting_date, skip_column=3
|
|
|
|
|
|
|
+ csv_filename: Union[str, bytes, os.PathLike],
|
|
|
|
|
+ starting_date: datetime.datetime,
|
|
|
|
|
+ skip_column: int = 3,
|
|
|
) -> DataFrame[CreneauSchema]:
|
|
) -> DataFrame[CreneauSchema]:
|
|
|
list_creneau = []
|
|
list_creneau = []
|
|
|
with io.open(csv_filename, "r", encoding="utf-8") as f:
|
|
with io.open(csv_filename, "r", encoding="utf-8") as f:
|
|
@@ -213,13 +237,7 @@ def getPlanningDataFrame(
|
|
|
return CreneauSchema.validate(df)
|
|
return CreneauSchema.validate(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
-def parseGsheet(
|
|
|
|
|
- doc_uuid: str, saturday_date: datetime.datetime
|
|
|
|
|
-) -> tuple[
|
|
|
|
|
- DataFrame[ContactSchema],
|
|
|
|
|
- DataFrame[CreneauDataSchema],
|
|
|
|
|
- DataFrame[CreneauSchema],
|
|
|
|
|
-]:
|
|
|
|
|
|
|
+def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime) -> GsheetData:
|
|
|
suffix = "_2023"
|
|
suffix = "_2023"
|
|
|
fname_planning = f"./planning{suffix}.csv"
|
|
fname_planning = f"./planning{suffix}.csv"
|
|
|
fname_creneau = f"./creneau{suffix}.csv"
|
|
fname_creneau = f"./creneau{suffix}.csv"
|
|
@@ -247,5 +265,4 @@ def parseGsheet(
|
|
|
os.remove(fname_planning)
|
|
os.remove(fname_planning)
|
|
|
os.remove(fname_creneau)
|
|
os.remove(fname_creneau)
|
|
|
os.remove(fname_contact)
|
|
os.remove(fname_contact)
|
|
|
-
|
|
|
|
|
- return df_contact, df_creneau, df_planning
|
|
|
|
|
|
|
+ return GsheetData(df_contact, df_creneau, df_planning)
|