Kaynağa Gözat

improve typ hnts for gsheet

Clovis at parent 1 yıl önce
ebeveyn
işleme
3093f009bd

+ 4 - 3
app/api/endpoints/project.py

@@ -125,13 +125,13 @@ async def update_project_from_gsheet(
         p.volunteers = []
     # Parse the gsheets
     try:
-        df_contact, df_creneau, df_planning = parseGsheet(doc_id, gsheet.satursday_date)
+        data = parseGsheet(doc_id, gsheet.satursday_date)
     except ParsingError as exc:
         raise HTTPException(status_code=422, detail=str(exc))
 
     # Create the volunteer list
     volunteer_map: dict[str, Volunteer] = {}
-    for _, row in df_contact.iterrows():
+    for _, row in data.contact.iterrows():
         volunteer = Volunteer(
             project_id=project_id,
             name=row["Prénom"],
@@ -147,7 +147,7 @@ async def update_project_from_gsheet(
     template_map = {}
     tags_map = {}
 
-    for _, row in df_creneau.iterrows():
+    for _, row in data.creneauData.iterrows():
         template = SlotTemplate(
             project_id=project_id,
             title=row.title,
@@ -169,6 +169,7 @@ async def update_project_from_gsheet(
         template_map[template.title] = template
         session.add(template)
 
+    df_planning = data.planning
     # group planning entry per same name and timing
     date_format = "%Y/%m/%d %H:%M"
     df_planning["key"] = (

+ 33 - 16
app/importData/gsheet.py

@@ -4,13 +4,14 @@ import os
 from enum import Enum
 from urllib.parse import urlparse
 from uuid import uuid4
+from typing import Union
 
 import pandas as pd
 import requests
-from pandera.typing import DataFrame
 from pandera.errors import SchemaError
+from pandera.typing import DataFrame
 
-from .validation import ContactSchema, CreneauSchema, CreneauDataSchema
+from .validation import ContactSchema, CreneauSchema, CreneauDataSchema, GsheetData
 
 planning_gid = "1001381542"
 creneau_gid = "1884137958"
@@ -25,6 +26,16 @@ class ParserState(Enum):
 
 
 def split_csv_row(raw_data: str, separator: str = ",", escape: str = '"') -> list[str]:
+    """Split a csv row into the different value
+
+    Args:
+        raw_data (str): data
+        separator (str, optional): column separator. Defaults to ",".
+        escape (str, optional): excaping character . Defaults to '"'.
+
+    Returns:
+        list[str]: list of value in the csv row
+    """
     state: ParserState = ParserState.STARTING_VALUE
     arr = []
     current_item = ""
@@ -70,6 +81,17 @@ class ParsingError(Exception):
 
 
 def extract_doc_uid(url: str) -> str:
+    """Extract the uid of a gsheet from its url
+
+    Args:
+        url (str): url of google sheet to extract uid from
+
+    Raises:
+        InvalidUrlError: if the url does not correspond to a gsheet url
+
+    Returns:
+        str: uuid of the google sheet
+    """
     res = urlparse(url)
     if res.netloc != "docs.google.com":
         raise InvalidUrlError("Invalid netloc")
@@ -82,15 +104,15 @@ def extract_doc_uid(url: str) -> str:
     return doc_id
 
 
-def build_sheet_url(doc_id, sheet_id):
+def build_sheet_url(doc_id: str, sheet_id: str):
     return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
 
 
-def downloadAndSave(doc_ui, sheet_gid, fname):
+def downloadAndSave(doc_ui: str, sheet_gid: str, file: Union[str, bytes, os.PathLike]):
     url = build_sheet_url(doc_ui, sheet_gid)
-    print("Downloading " + fname)
+    print("Downloading " + str(file))
     rep = requests.get(url)
-    with open(fname, "wb") as f:
+    with open(file, "wb") as f:
         f.write(rep.content)
 
 
@@ -120,7 +142,9 @@ def getCreneauDataFrame(csv_filename: str) -> DataFrame[CreneauDataSchema]:
 
 
 def getPlanningDataFrame(
-    csv_filename, starting_date, skip_column=3
+    csv_filename: Union[str, bytes, os.PathLike],
+    starting_date: datetime.datetime,
+    skip_column: int = 3,
 ) -> DataFrame[CreneauSchema]:
     list_creneau = []
     with io.open(csv_filename, "r", encoding="utf-8") as f:
@@ -213,13 +237,7 @@ def getPlanningDataFrame(
     return CreneauSchema.validate(df)
 
 
-def parseGsheet(
-    doc_uuid: str, saturday_date: datetime.datetime
-) -> tuple[
-    DataFrame[ContactSchema],
-    DataFrame[CreneauDataSchema],
-    DataFrame[CreneauSchema],
-]:
+def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime) -> GsheetData:
     suffix = "_2023"
     fname_planning = f"./planning{suffix}.csv"
     fname_creneau = f"./creneau{suffix}.csv"
@@ -247,5 +265,4 @@ def parseGsheet(
     os.remove(fname_planning)
     os.remove(fname_creneau)
     os.remove(fname_contact)
-
-    return df_contact, df_creneau, df_planning
+    return GsheetData(df_contact, df_creneau, df_planning)

+ 17 - 1
app/importData/validation.py

@@ -3,7 +3,7 @@ from typing import Optional
 import pandas as pd
 import pandera as pa
 
-from pandera.typing import Series
+from pandera.typing import Series, DataFrame
 
 
 class ContactSchema(pa.DataFrameModel):
@@ -35,3 +35,19 @@ class CreneauSchema(pa.DataFrameModel):
     end: Series[pd.DatetimeTZDtype] = pa.Field(
         dtype_kwargs={"unit": "ns", "tz": "UTC"}, coerce=True
     )
+
+
+class GsheetData:
+    contact: DataFrame[ContactSchema]
+    creneauData: DataFrame[CreneauDataSchema]
+    planning: DataFrame[CreneauSchema]
+
+    def __init__(
+        self,
+        contact: DataFrame[ContactSchema],
+        creneauData: DataFrame[CreneauDataSchema],
+        planning: DataFrame[CreneauSchema],
+    ):
+        self.contact = contact
+        self.creneauData = creneauData
+        self.planning = planning