gsheet.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. import datetime
  2. import io
  3. import os
  4. from enum import Enum
  5. from uuid import uuid4
  6. import pandas as pd
  7. import requests
  8. planning_gid = "1001381542"
  9. creneau_gid = "1884137958"
  10. benevole_gid = "82247394"
  11. class ParserState(Enum):
  12. STARTING_VALUE = 0
  13. READING_VALUE = 1
  14. ESCAPING = 2
  15. CLOSING_ESCAPE = 3
  16. def split_csv_row(raw_data: str, separator: str = ",", escape: str = '"') -> list[str]:
  17. state: ParserState = ParserState.STARTING_VALUE
  18. arr = []
  19. current_item = ""
  20. for c in raw_data:
  21. if state == ParserState.STARTING_VALUE:
  22. if c == escape:
  23. state = ParserState.ESCAPING
  24. elif c == separator:
  25. arr.append("")
  26. else:
  27. state = ParserState.READING_VALUE
  28. current_item = c
  29. elif state == ParserState.READING_VALUE:
  30. if c == separator:
  31. state = ParserState.STARTING_VALUE
  32. arr.append(current_item)
  33. current_item = ""
  34. else:
  35. current_item += c
  36. elif state == ParserState.ESCAPING:
  37. if c == escape:
  38. state = ParserState.CLOSING_ESCAPE
  39. else:
  40. current_item += c
  41. elif state == ParserState.CLOSING_ESCAPE:
  42. if c == escape:
  43. state = ParserState.ESCAPING
  44. current_item += c
  45. else:
  46. state = ParserState.READING_VALUE
  47. arr.append(current_item)
  48. current_item = ""
  49. arr.append(current_item)
  50. return arr
  51. def build_sheet_url(doc_id, sheet_id):
  52. return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
  53. def downloadAndSave(doc_ui, sheet_gid, fname):
  54. url = build_sheet_url(doc_ui, sheet_gid)
  55. print("Downloading " + fname)
  56. rep = requests.get(url)
  57. with open(fname, "wb") as f:
  58. f.write(rep.content)
  59. def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
  60. df_contact = pd.read_csv(csv_filename, skiprows=skiprows)
  61. column_to_drop = [name for name in df_contact.columns if "Unnamed" in name]
  62. df_contact.drop(column_to_drop, axis=1, inplace=True)
  63. # Filter out empty name
  64. df_contact = df_contact[~df_contact.Nom.isnull()]
  65. df_contact.reset_index()
  66. return df_contact
  67. def getCreneauDataFrame(csv_filename: str) -> pd.DataFrame:
  68. df_creneau = pd.read_csv(csv_filename)
  69. df_creneau.columns = ["nom", "lieu", "description", "responsable"]
  70. return df_creneau
  71. def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
  72. list_creneau = []
  73. with io.open(csv_filename, "r", encoding="utf-8") as f:
  74. datas = [split_csv_row(s.replace("\n", "")) for s in f.readlines()]
  75. def getDate(day: str) -> datetime.datetime:
  76. s = day.lower()
  77. if s.startswith("jeudi"):
  78. return starting_date
  79. elif s.startswith("vendredi"):
  80. return starting_date + datetime.timedelta(days=1)
  81. elif s.startswith("samedi"):
  82. return starting_date + datetime.timedelta(days=2)
  83. elif s.startswith("dimanche"):
  84. return starting_date + datetime.timedelta(days=3)
  85. elif s.startswith("lundi"):
  86. return starting_date + datetime.timedelta(days=4)
  87. raise KeyError("This day is not valid : " + s)
  88. def getTime(time_str: str) -> datetime.timedelta:
  89. l = time_str.split("h")
  90. hours = int(l[0])
  91. if hours < 5:
  92. hours += 24
  93. if len(l) > 1 and l[1] != "":
  94. return datetime.timedelta(hours=hours, minutes=int(l[1]))
  95. else:
  96. return datetime.timedelta(hours=hours)
  97. def getStartEnd(time_str: str) -> tuple:
  98. l = time_str.split("-")
  99. if len(l) == 2:
  100. return getTime(l[0]), getTime(l[1])
  101. else:
  102. start = getTime(time_str.split("+")[0])
  103. return start, start + datetime.timedelta(hours=1)
  104. # Parse headers
  105. headers = datas[skip_column : skip_column + 2]
  106. days_str = headers[0]
  107. hours = headers[1]
  108. column_to_dates: dict[int, dict[str, datetime.datetime]] = {}
  109. assert len(hours) == len(hours)
  110. for i in range(skip_column, len(days_str)):
  111. day = getDate(days_str[i])
  112. start, end = getStartEnd(hours[i])
  113. column_to_dates[i] = {"start": day + start, "end": day + end}
  114. list_creneau: list[dict] = []
  115. for i in range(5, len(datas)):
  116. row = datas[i]
  117. current_benevole_name = ""
  118. current_time: dict[str, datetime.datetime] = {}
  119. for j in range(skip_column, len(row)):
  120. if (current_benevole_name != "") and (row[j] == "" or row[j] != current_benevole_name):
  121. new_creneau = {
  122. "id": uuid4(),
  123. "description_id": row[0],
  124. "nom": row[1],
  125. "benevole_nom": current_benevole_name,
  126. "ligne": i + 1,
  127. **current_time,
  128. }
  129. list_creneau.append(new_creneau)
  130. current_benevole_name = ""
  131. current_time = {}
  132. if row[j] != "":
  133. current_benevole_name = row[j]
  134. if len(current_time.keys()) == 0:
  135. current_time = column_to_dates[j].copy()
  136. else:
  137. current_time["end"] = column_to_dates[j]["end"]
  138. if current_benevole_name != "":
  139. new_creneau = {
  140. "id": uuid4(),
  141. "description_id": row[0],
  142. "nom": row[1],
  143. "benevole_nom": current_benevole_name,
  144. "ligne": i + 1,
  145. **current_time,
  146. }
  147. list_creneau.append(new_creneau)
  148. print(f"{len(list_creneau)} créneaux trouvés")
  149. return pd.DataFrame.from_dict(list_creneau)
  150. def parseGsheet(doc_uuid="1Ueuw8VVAmnx5k_t8tjHwLIuYwQJrKcwNbibbaqWRWAs"):
  151. suffix = "_2023"
  152. fname_planning = f"./planning{suffix}.csv"
  153. fname_creneau = f"./creneau{suffix}.csv"
  154. fname_contact = f"./benevole{suffix}.csv"
  155. downloadAndSave(doc_uuid, planning_gid, fname_planning)
  156. downloadAndSave(doc_uuid, creneau_gid, fname_creneau)
  157. downloadAndSave(doc_uuid, benevole_gid, fname_contact)
  158. df_contact = getContactDataFrame(fname_contact)
  159. df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
  160. df_creneau = getCreneauDataFrame(fname_creneau)
  161. df_planning = getPlanningDataFrame(fname_planning, datetime.datetime(2023, 9, 14))
  162. os.remove(fname_planning)
  163. os.remove(fname_creneau)
  164. os.remove(fname_contact)
  165. return df_contact, df_creneau, df_planning