|
|
@@ -7,6 +7,10 @@ from uuid import uuid4
|
|
|
|
|
|
import pandas as pd
|
|
|
import requests
|
|
|
+from pandera.typing import DataFrame
|
|
|
+from pandera.errors import SchemaError
|
|
|
+
|
|
|
+from .validation import ContactSchema, CreneauSchema, CreneauDataSchema
|
|
|
|
|
|
planning_gid = "1001381542"
|
|
|
creneau_gid = "1884137958"
|
|
|
@@ -61,6 +65,10 @@ class InvalidUrlError(Exception):
|
|
|
pass
|
|
|
|
|
|
|
|
|
+class ParsingError(Exception):
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
def extract_doc_uid(url: str) -> str:
|
|
|
res = urlparse(url)
|
|
|
if res.netloc != "docs.google.com":
|
|
|
@@ -86,7 +94,9 @@ def downloadAndSave(doc_ui, sheet_gid, fname):
|
|
|
f.write(rep.content)
|
|
|
|
|
|
|
|
|
-def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
|
|
|
+def getContactDataFrame(
|
|
|
+ csv_filename: str, skiprows: int = 2
|
|
|
+) -> DataFrame[ContactSchema]:
|
|
|
df_contact = pd.read_csv(csv_filename, skiprows=skiprows)
|
|
|
column_to_drop = [name for name in df_contact.columns if "Unnamed" in name]
|
|
|
df_contact.drop(column_to_drop, axis=1, inplace=True)
|
|
|
@@ -94,14 +104,19 @@ def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
|
|
|
# Filter out empty name
|
|
|
df_contact = df_contact[~df_contact.Nom.isnull()]
|
|
|
df_contact.reset_index()
|
|
|
- return df_contact
|
|
|
+ # coerce SMS to boolean
|
|
|
+ df_contact["SMS"] = df_contact["SMS"] == "Oui"
|
|
|
+ # create unique contact key
|
|
|
+ df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
|
|
|
+ return ContactSchema.validate(df_contact)
|
|
|
|
|
|
|
|
|
-def getCreneauDataFrame(csv_filename: str) -> pd.DataFrame:
|
|
|
+def getCreneauDataFrame(csv_filename: str) -> DataFrame[CreneauDataSchema]:
|
|
|
df_creneau = pd.read_csv(csv_filename)
|
|
|
df_creneau.columns = ["title", "lieu", "description", "responsable", "tags"]
|
|
|
- df_creneau[df_creneau.tags.isnull()].tags = ""
|
|
|
- return df_creneau
|
|
|
+ df_creneau.loc[df_creneau.tags.isnull(), "tags"] = ""
|
|
|
+ df_creneau.loc[df_creneau.lieu.isnull(), "lieu"] = ""
|
|
|
+ return CreneauDataSchema.validate(df_creneau)
|
|
|
|
|
|
|
|
|
def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
|
|
|
@@ -164,7 +179,7 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
|
|
|
row[j] == "" or row[j] != current_benevole_name
|
|
|
):
|
|
|
new_creneau = {
|
|
|
- "id": uuid4(),
|
|
|
+ "id": str(uuid4()),
|
|
|
"template_id": row[0],
|
|
|
"nom": row[1],
|
|
|
"benevole_nom": current_benevole_name,
|
|
|
@@ -182,7 +197,7 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
|
|
|
current_time["end"] = column_to_dates[j]["end"]
|
|
|
if current_benevole_name != "":
|
|
|
new_creneau = {
|
|
|
- "id": uuid4(),
|
|
|
+ "id": str(uuid4()),
|
|
|
"template_id": row[0],
|
|
|
"nom": row[1],
|
|
|
"benevole_nom": current_benevole_name,
|
|
|
@@ -192,7 +207,8 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
|
|
|
list_creneau.append(new_creneau)
|
|
|
|
|
|
print(f"{len(list_creneau)} créneaux trouvés")
|
|
|
- return pd.DataFrame.from_dict(list_creneau)
|
|
|
+ df = pd.DataFrame.from_dict(list_creneau)
|
|
|
+ return CreneauSchema.validate(df)
|
|
|
|
|
|
|
|
|
def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime):
|
|
|
@@ -204,11 +220,20 @@ def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime):
|
|
|
downloadAndSave(doc_uuid, creneau_gid, fname_creneau)
|
|
|
downloadAndSave(doc_uuid, benevole_gid, fname_contact)
|
|
|
|
|
|
- df_contact = getContactDataFrame(fname_contact)
|
|
|
- df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
|
|
|
-
|
|
|
- df_creneau = getCreneauDataFrame(fname_creneau)
|
|
|
- df_planning = getPlanningDataFrame(fname_planning, saturday_date)
|
|
|
+ try:
|
|
|
+ df_contact = getContactDataFrame(fname_contact)
|
|
|
+ except SchemaError as exc:
|
|
|
+ msg = "Donnée erronée sur les bénévoles\n" + str(exc)
|
|
|
+ raise ParsingError(msg)
|
|
|
+
|
|
|
+ try:
|
|
|
+ df_creneau = getCreneauDataFrame(fname_creneau)
|
|
|
+ except SchemaError as exc:
|
|
|
+ raise ParsingError("Donnée erronée des description des créneaux\n" + str(exc))
|
|
|
+ try:
|
|
|
+ df_planning = getPlanningDataFrame(fname_planning, saturday_date)
|
|
|
+ except SchemaError as exc:
|
|
|
+ raise ParsingError("Donnée erronée des créneaux\n" + str(exc))
|
|
|
|
|
|
os.remove(fname_planning)
|
|
|
os.remove(fname_creneau)
|